<a href="https://colab.research.google.com/github/Khalidaman9555/IDS-AI/blob/main/System_Integration_IDS.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# @title 🧭 Set up project directories and imports
import os
import sys
import json
import time
import random
import pandas as pd
import numpy as np
import pickle # We might need to change this later for model loading
import threading
import argparse # For command-line argument parsing, usually not run directly in Colab cells like this
from datetime import datetime

# --- ACTION REQUIRED: Mount your Google Drive if not already mounted ---
# from google.colab import drive
# drive.mount('/content/drive')

# --- ACTION REQUIRED: Set your project's base directory path ---
# Replace the example path below with your desired Google Drive path.
# If you don't set this, a temporary directory in /content/ will be used,
# and data will NOT persist after your Colab session ends.
BASE_DIR = "/content/drive/MyDrive/IDS_AI_Project" # <<<< CHANGE THIS TO YOUR PREFERRED GDRIVE PATH

if BASE_DIR == "/content/drive/MyDrive/IDS_AI_Project":
    print("🔔 Using default example BASE_DIR. Make sure this is the path you want in your Google Drive.")
    print("   If not, please modify the BASE_DIR variable in this cell before proceeding.")
elif not BASE_DIR or BASE_DIR.strip() == "":
    print("🛑 BASE_DIR is not set. Please provide a valid Google Drive path.")
    print("   Defaulting to a temporary path: /content/ids_ai_project_temp")
    print("   ⚠️ Data in this temporary path will NOT persist across Colab sessions.")
    BASE_DIR = "/content/ids_ai_project_temp"
else:
    print(f"✅ BASE_DIR set to: {BASE_DIR}")


DATA_DIR = os.path.join(BASE_DIR, "data")
MODEL_DIR = os.path.join(BASE_DIR, "models")
LOG_DIR = os.path.join(BASE_DIR, "logs")

# Create directories if they don't exist
for directory in [BASE_DIR, DATA_DIR, MODEL_DIR, LOG_DIR]:
    os.makedirs(directory, exist_ok=True)

print(f"\nProject base directory: {BASE_DIR}")
print(f"Data directory: {DATA_DIR}")
print(f"Models directory: {MODEL_DIR}")
print(f"Logs directory: {LOG_DIR}")

# Sensor configuration (derived from your Sensors_and_attacks.ipynb)
SENSOR_CONFIG = {
    "temperature_1": {"normal_range": (15, 30), "attack_value": 100, "unit": "°C"},
    "humidity_1": {"normal_range": (30, 60), "attack_value": 99, "unit": "%"},
    "pressure_1": {"normal_range": (1000, 1020), "attack_value": 500, "unit": "hPa"},
    "vibration_1": {"normal_range": (0, 0.5), "attack_value": 5.0, "unit": "g"},
    "plc_register_A": {"normal_range": (0, 100), "attack_value": 999, "unit": "value"},
    "flow_rate_1": {"normal_range": (50, 150), "attack_value": 0, "unit": "L/min"},
    "motor_current_1": {"normal_range": (1.0, 5.0), "attack_value": 20.0, "unit": "A"}
}
print(f"\nLoaded SENSOR_CONFIG: {len(SENSOR_CONFIG)} sensors configured.")

# Attack types (derived from your Sensors_and_attacks.ipynb and script)
ATTACK_TYPES = [
    "normal", # Represents normal operation
    "tcp_syn_flood",
    "port_scan",
    "sql_injection",
    "udp_flood",
    "arp_spoofing",
    "data_manipulation", # Affects sensor readings directly
    "sensor_outage",     # Affects sensor availability
    "command_injection",
    "brute_force",
    "mitm",              # Man-in-the-middle
    "replay_attack",
    "backdoor",
    "ransomware"
]
print(f"Loaded ATTACK_TYPES: {len(ATTACK_TYPES)} types defined (including 'normal').")

print("\n✅ Section 1 (Setup and Configuration) is ready to run.")

🔔 Using default example BASE_DIR. Make sure this is the path you want in your Google Drive.
   If not, please modify the BASE_DIR variable in this cell before proceeding.

Project base directory: /content/drive/MyDrive/IDS_AI_Project
Data directory: /content/drive/MyDrive/IDS_AI_Project/data
Models directory: /content/drive/MyDrive/IDS_AI_Project/models
Logs directory: /content/drive/MyDrive/IDS_AI_Project/logs

Loaded SENSOR_CONFIG: 7 sensors configured.
Loaded ATTACK_TYPES: 14 types defined (including 'normal').

✅ Section 1 (Setup and Configuration) is ready to run.


In [None]:
#@title 🌡️ Section 2: Sensor Class Definition
class Sensor:
    """Class representing an IoT sensor with normal and attack behaviors."""

    def __init__(self, sensor_id, sensor_type, normal_range, unit):
        self.sensor_id = sensor_id
        self.sensor_type = sensor_type
        self.normal_range = normal_range
        self.unit = unit
        # Initialize with a normal value
        self.current_value = self.generate_normal_value()
        # print(f"Sensor initialized: {self.sensor_id} (Type: {self.sensor_type}, Range: {self.normal_range}, Unit: {self.unit})") # Optional: for verbose init

    def generate_normal_value(self):
        """Generate a normal sensor reading within the expected range."""
        return round(random.uniform(self.normal_range[0], self.normal_range[1]), 2)

    def read_value(self, under_attack=False, attack_type=None, attack_intensity=1):
        """
        Generates a sensor reading.
        If under_attack, the value might be anomalous based on attack_type.
        attack_intensity can be used to scale the anomaly (e.g., 1 to 5).
        """
        timestamp = time.time()
        current_status = "normal"
        generated_value = self.current_value # Start with current value

        if under_attack:
            current_status = "anomalous"
            # Use specific attack_value from SENSOR_CONFIG if defined for this sensor
            if self.sensor_id in SENSOR_CONFIG and 'attack_value' in SENSOR_CONFIG[self.sensor_id]:
                base_attack_val = SENSOR_CONFIG[self.sensor_id]['attack_value']
                # Add some minor variation to the defined attack value based on intensity
                variation_range = (self.normal_range[1] - self.normal_range[0]) * 0.05 * attack_intensity # Using normal_range to scale variation
                variation = random.uniform(-variation_range, variation_range) if variation_range > 0 else 0
                generated_value = round(base_attack_val + variation, 2)

            elif attack_type == "data_manipulation":
                # General data manipulation if no specific attack_value is set in SENSOR_CONFIG
                # Make the anomaly more significant based on intensity
                # Anomaly could be a multiple of the normal range bounds, or a fixed large deviation
                anomaly_magnitude_factor = 0.5 + (attack_intensity / 5.0) # Factor from 0.7 to 1.5
                if random.random() < 0.5 : # deviate from lower bound
                    generated_value = round(self.normal_range[0] - ( (self.normal_range[1]-self.normal_range[0]) * anomaly_magnitude_factor * random.uniform(0.5,1.0)),2)
                else: # deviate from upper bound
                    generated_value = round(self.normal_range[1] + ( (self.normal_range[1]-self.normal_range[0]) * anomaly_magnitude_factor * random.uniform(0.5,1.0)),2)


            elif attack_type == "sensor_outage":
                return {
                    "timestamp": timestamp,
                    "sensor_id": self.sensor_id,
                    "value": None, # Or use np.nan if pandas/numpy is heavily used later
                    "unit": self.unit,
                    "status": "outage",
                    "event_type": "sensor_reading"
                }
            else:
                # For other generic attacks, simulate some deviation from normal, scaled by intensity
                # This makes the sensor reading less predictable but still anomalous
                noise_magnitude = (self.normal_range[1] - self.normal_range[0]) * 0.2 * attack_intensity # 20% of range * intensity
                noise = random.uniform(-noise_magnitude, noise_magnitude) if noise_magnitude > 0 else 0
                # Deviate from the center of the normal range or an extreme
                base_for_noise = self.normal_range[0] if random.random() < 0.5 else self.normal_range[1]
                generated_value = round(base_for_noise + noise, 2)
        else:
            # Normal operation: simulate slight variations or a new normal value
            if random.random() < 0.7: # 70% chance to stay close to the previous value
                variation = (self.normal_range[1] - self.normal_range[0]) * 0.05 # Max 5% variation of the range
                noise = random.uniform(-variation, variation) if variation > 0 else 0
                generated_value = round(self.current_value + noise, 2)
                # Keep within normal range during normal operation
                generated_value = max(self.normal_range[0], min(self.normal_range[1], generated_value))
            else: # 30% chance to generate a completely new normal value
                generated_value = self.generate_normal_value()

        self.current_value = generated_value # Update sensor's last known value

        return {
            "timestamp": timestamp,
            "sensor_id": self.sensor_id,
            "value": self.current_value,
            "unit": self.unit,
            "status": current_status,
            "event_type": "sensor_reading"
        }

# --- Test the Sensor class (optional, for this cell) ---
print("Sensor Class Definition:")
# Example of creating and reading from a sensor
if SENSOR_CONFIG:
    # Create a list of sensor objects based on SENSOR_CONFIG
    # This step will be done formally in the IDSSimulator class later
    temp_sensors_for_testing = []
    for s_id, s_conf in SENSOR_CONFIG.items():
        s_type = s_id.split('_')[0] if '_' in s_id else 'unknown'
        temp_sensors_for_testing.append(Sensor(s_id, s_type, s_conf["normal_range"], s_conf["unit"]))

    if temp_sensors_for_testing:
        print(f"\n--- Testing Sensor Logic with {len(temp_sensors_for_testing)} configured sensors ---")
        # Test with the first configured sensor
        test_sensor = temp_sensors_for_testing[0]
        print(f"\n--- Testing Behavior of Sensor: {test_sensor.sensor_id} ---")
        print("Initial value (after __init__):", test_sensor.current_value)
        time.sleep(0.01) # simulate time passing
        print("Normal reading 1:", test_sensor.read_value())
        time.sleep(0.01)
        print("Normal reading 2:", test_sensor.read_value())
        time.sleep(0.01)
        print("Attack (data_manipulation, intensity 2) reading:", test_sensor.read_value(under_attack=True, attack_type="data_manipulation", attack_intensity=2))
        time.sleep(0.01)
        print("Attack (sensor_outage) reading:", test_sensor.read_value(under_attack=True, attack_type="sensor_outage"))
        time.sleep(0.01)
        # Example of an attack type not directly handled by specific sensor logic but should still show anomaly
        print("Attack (e.g., 'tcp_syn_flood' affecting environment, intensity 1) reading:", test_sensor.read_value(under_attack=True, attack_type="tcp_syn_flood", attack_intensity=1))
        print("--- End Sensor Behavior Test ---")
else:
    print("SENSOR_CONFIG is not defined or empty. Cannot run sensor test.")

print("\n✅ Section 2 (Sensor Class Definition) is ready.")

Sensor Class Definition:

--- Testing Sensor Logic with 7 configured sensors ---

--- Testing Behavior of Sensor: temperature_1 ---
Initial value (after __init__): 29.03
Normal reading 1: {'timestamp': 1748475987.239072, 'sensor_id': 'temperature_1', 'value': 16.92, 'unit': '°C', 'status': 'normal', 'event_type': 'sensor_reading'}
Normal reading 2: {'timestamp': 1748475987.2493358, 'sensor_id': 'temperature_1', 'value': 17.1, 'unit': '°C', 'status': 'normal', 'event_type': 'sensor_reading'}
Attack (data_manipulation, intensity 2) reading: {'timestamp': 1748475987.2595518, 'sensor_id': 'temperature_1', 'value': 100.32, 'unit': '°C', 'status': 'anomalous', 'event_type': 'sensor_reading'}
Attack (sensor_outage) reading: {'timestamp': 1748475987.269801, 'sensor_id': 'temperature_1', 'value': 100.71, 'unit': '°C', 'status': 'anomalous', 'event_type': 'sensor_reading'}
Attack (e.g., 'tcp_syn_flood' affecting environment, intensity 1) reading: {'timestamp': 1748475987.2800288, 'sensor_id': 't

In [None]:
# @title 💻 Section 3: NetworkDevice Class Definition
class NetworkDevice:
    """Class representing a network device that can be targeted by attacks."""

    def __init__(self, device_id, device_type, ip_address):
        self.device_id = device_id
        self.device_type = device_type
        self.ip_address = ip_address
        self.status = "normal" # General operational status
        self.connections = random.randint(5, 50) # Initial normal connections
        self.traffic_bytes = random.randint(1000, 100000) # Initial normal traffic in bytes
        # print(f"NetworkDevice initialized: {self.device_id} (Type: {self.device_type}, IP: {self.ip_address})") # Optional

    def generate_log(self, under_attack=False, attack_type=None, attack_intensity=1):
        """Generate network logs, potentially showing attack patterns."""
        timestamp = time.time()
        log_entry = {
            "timestamp": timestamp,
            "device_id": self.device_id,
            "ip_address": self.ip_address,
            "event_type": "network_log" # Differentiating from sensor events
        }

        current_device_status = "normal" # Status for this specific log entry
        alert_message = None

        if under_attack:
            current_device_status = "anomalous_activity_detected" # General alert status
            log_entry["attack_indicator"] = attack_type
            log_entry["attack_intensity_level"] = attack_intensity

            if attack_type == "tcp_syn_flood":
                self.connections += int(100 * attack_intensity * random.uniform(0.8, 1.2))
                self.traffic_bytes += int(5000 * attack_intensity * random.uniform(0.8, 1.2))
                current_device_status = "high_load_syn_flood"
                alert_message = "Abnormal increase in half-open TCP connections."
            elif attack_type == "port_scan":
                self.connections += int(20 * attack_intensity * random.uniform(0.5, 1.5))
                self.traffic_bytes += int(1000 * attack_intensity * random.uniform(0.5, 1.5))
                current_device_status = "suspicious_port_activity"
                alert_message = "Rapid connection attempts to multiple ports detected."
                log_entry["scanned_ports_count_estimate"] = random.randint(10, 100) * attack_intensity
            elif attack_type == "udp_flood":
                self.traffic_bytes += int(10000 * attack_intensity * random.uniform(0.8, 1.2))
                # UDP is connectionless; 'connections' might represent active communication sessions or flows
                self.connections += int(5 * attack_intensity * random.uniform(0.5, 1.5))
                current_device_status = "high_udp_traffic_volume"
                alert_message = "Excessive UDP traffic volume observed."
            elif attack_type == "arp_spoofing":
                current_device_status = "arp_cache_anomaly"
                alert_message = "Potential ARP cache poisoning: MAC address conflict or unsolicited ARP replies."
                log_entry["conflicting_mac_example"] = f"00:1A:2B:3C:4D:{random.randint(10,99):02X}"
            elif attack_type == "mitm":
                current_device_status = "mitm_traffic_redirection"
                alert_message = "Suspicious traffic redirection or interception; potential MITM."
            elif attack_type == "sql_injection":
                current_device_status = "sql_injection_attempt_detected"
                alert_message = "Potentially malicious SQL query pattern detected in application traffic."
                log_entry["example_sql_pattern"] = "1' OR '1'='1"
            elif attack_type == "command_injection":
                current_device_status = "command_injection_attempt_detected"
                alert_message = "Suspected OS command injection attempt in network request."
                log_entry["example_command_pattern"] = ";&cat /etc/passwd"
            elif attack_type == "brute_force":
                self.connections += int(10 * attack_intensity * random.uniform(0.8, 1.2)) # More attempts
                current_device_status = "brute_force_login_attempts"
                alert_message = "High rate of failed login attempts detected."
                log_entry["failed_login_attempts_estimate"] = random.randint(20, 100) * attack_intensity
            elif attack_type == "replay_attack":
                current_device_status = "replay_attack_suspected"
                alert_message = "Duplicate or out-of-sequence packets suggesting a replay attack."
            elif attack_type == "backdoor":
                current_device_status = "unauthorized_outbound_connection"
                alert_message = "Unusual outbound connection to an unknown or suspicious IP."
                log_entry["suspicious_destination_ip"] = f"198.51.100.{random.randint(1,254)}"
            elif attack_type == "ransomware":
                current_device_status = "ransomware_network_activity"
                alert_message = "Network traffic patterns consistent with ransomware (e.g., C&C, lateral movement)."
                self.traffic_bytes += int(2000 * attack_intensity * random.uniform(0.8, 1.2)) # May involve data exfil or C&C
            else: # Generic or other attack types
                self.traffic_bytes += int(1000 * attack_intensity * random.uniform(0.5,1.5))
                self.connections += int(10 * attack_intensity * random.uniform(0.5,1.5))
                alert_message = f"General anomalous network activity of type: {attack_type}"

            if alert_message:
                 log_entry["alert_message"] = alert_message
        else: # Normal operation
            # Simulate some fluctuation
            self.connections = max(5, self.connections + random.randint(-5, 5)) # Ensure connections don't drop too low
            self.traffic_bytes = max(1000, self.traffic_bytes + random.randint(-2000, 2000)) # Ensure traffic doesn't drop too low
            # Cap normal operation to prevent unrealistic indefinite growth
            self.connections = min(self.connections, 200) # Cap normal connections
            self.traffic_bytes = min(self.traffic_bytes, 500000) # Cap normal traffic
            current_device_status = "normal"

        log_entry["active_connections"] = self.connections
        log_entry["traffic_total_bytes"] = self.traffic_bytes # Use a distinct name for total traffic
        log_entry["device_operational_status"] = current_device_status

        # Update the class's overall status (might be a smoothed/persistent state)
        self.status = current_device_status

        return log_entry

# --- Test the NetworkDevice class (optional, for this cell) ---
print("NetworkDevice Class Definition:")
test_device = NetworkDevice(device_id="router_core", device_type="router", ip_address="192.168.1.1")
print(f"\n--- Testing Device: {test_device.device_id} ---")
print("Normal log:", json.dumps(test_device.generate_log(), indent=2))
time.sleep(0.01)
print("\nAttack (tcp_syn_flood) log:", json.dumps(test_device.generate_log(under_attack=True, attack_type="tcp_syn_flood", attack_intensity=3), indent=2))
time.sleep(0.01)
print("\nAttack (port_scan) log:", json.dumps(test_device.generate_log(under_attack=True, attack_type="port_scan", attack_intensity=2), indent=2))
print("\n--- End NetworkDevice Test ---")

print("\n✅ Section 3 (NetworkDevice Class Definition) is ready.")

NetworkDevice Class Definition:

--- Testing Device: router_core ---
Normal log: {
  "timestamp": 1748475987.3047683,
  "device_id": "router_core",
  "ip_address": "192.168.1.1",
  "event_type": "network_log",
  "active_connections": 48,
  "traffic_total_bytes": 27381,
  "device_operational_status": "normal"
}

Attack (tcp_syn_flood) log: {
  "timestamp": 1748475987.3152955,
  "device_id": "router_core",
  "ip_address": "192.168.1.1",
  "event_type": "network_log",
  "attack_indicator": "tcp_syn_flood",
  "attack_intensity_level": 3,
  "alert_message": "Abnormal increase in half-open TCP connections.",
  "active_connections": 346,
  "traffic_total_bytes": 44206,
  "device_operational_status": "high_load_syn_flood"
}

Attack (port_scan) log: {
  "timestamp": 1748475987.3260686,
  "device_id": "router_core",
  "ip_address": "192.168.1.1",
  "event_type": "network_log",
  "attack_indicator": "port_scan",
  "attack_intensity_level": 2,
  "scanned_ports_count_estimate": 166,
  "alert_messag

In [None]:
#@title 🚀 Section 4: IDSSimulator Class Definition
class IDSSimulator:
    """
    Simulates an IoT environment with sensors and network devices,
    and generates both normal and attack data.
    """

    def __init__(self):
        self.sensors = []
        self.network_devices = []
        self.attack_in_progress = False
        self.current_attack_type = "normal" # Start with normal state
        self.attack_intensity = 0
        self.attack_start_time = None
        self.attack_duration = 0
        self.logs = [] # In-memory log for the current session, if needed for quick inspection
        self.running = False
        self.simulation_thread = None

        # Initialize sensors from SENSOR_CONFIG
        if not SENSOR_CONFIG:
            print("⚠️ SENSOR_CONFIG is empty. No sensors will be initialized.")
        for sensor_id, config in SENSOR_CONFIG.items():
            sensor_type = sensor_id.split('_')[0] if '_' in sensor_id else 'generic_sensor'
            self.sensors.append(Sensor(
                sensor_id=sensor_id,
                sensor_type=sensor_type,
                normal_range=config["normal_range"],
                unit=config["unit"]
            ))
        print(f"IDSSimulator initialized {len(self.sensors)} sensors.")

        # Initialize some network devices
        num_devices = 5 # Example number of network devices
        for i in range(num_devices):
            self.network_devices.append(NetworkDevice(
                device_id=f"device_{i}",
                device_type=random.choice(["router", "switch", "server", "plc", "gateway"]),
                ip_address=f"192.168.1.{10+i}" # Example IP range
            ))
        print(f"IDSSimulator initialized {len(self.network_devices)} network devices.")

    def start_attack(self, attack_type, duration=60, intensity=1):
        """Start an attack of the specified type for the given duration."""
        if attack_type not in ATTACK_TYPES or attack_type == "normal":
            print(f"❌ Invalid or 'normal' attack type specified: {attack_type}. Attack not started.")
            return

        self.attack_in_progress = True
        self.current_attack_type = attack_type
        self.attack_intensity = max(1, min(5, intensity))  # Clamp intensity between 1-5
        self.attack_start_time = time.time()
        self.attack_duration = duration

        print(f"💥 Attack started: {self.current_attack_type} (Intensity: {self.attack_intensity}, Duration: {self.attack_duration:.2f}s)")

    def stop_attack(self):
        """Stop any ongoing attack and revert to normal."""
        if self.attack_in_progress:
            attack_ended_duration = time.time() - self.attack_start_time
            print(f"🛡️ Attack stopped: {self.current_attack_type} (Actual Duration: {attack_ended_duration:.2f}s)")
            self.attack_in_progress = False
            self.current_attack_type = "normal" # Revert to normal
            self.attack_intensity = 0
            self.attack_start_time = None
            self.attack_duration = 0

    def generate_data_point(self):
        """Generate a single comprehensive data point from all sensors and devices."""
        current_time = time.time()

        # Check if an ongoing attack should end
        if self.attack_in_progress and (current_time - self.attack_start_time > self.attack_duration):
            self.stop_attack()

        data_point = {
            "timestamp": current_time,
            "sensors": {},
            "network_devices": {}, # Changed from "network" to "network_devices" for clarity
            "overall_attack_status": { # More descriptive key
                "is_attack_ongoing": self.attack_in_progress, # Changed from "under_attack"
                "active_attack_type": self.current_attack_type, # Changed from "attack_type"
                "current_attack_intensity": self.attack_intensity, # Changed from "attack_intensity"
                "attack_elapsed_time_seconds": (current_time - self.attack_start_time) if self.attack_in_progress else 0
            }
        }

        # Generate sensor readings
        for sensor in self.sensors:
            reading = sensor.read_value(
                under_attack=self.attack_in_progress,
                attack_type=self.current_attack_type,
                attack_intensity=self.attack_intensity
            )
            data_point["sensors"][sensor.sensor_id] = reading

        # Generate network device logs
        for device in self.network_devices:
            log = device.generate_log(
                under_attack=self.attack_in_progress,
                attack_type=self.current_attack_type,
                attack_intensity=self.attack_intensity
            )
            data_point["network_devices"][device.device_id] = log

        return data_point

    def run_simulation(self, duration_seconds=None, data_interval_seconds=1.0, allow_random_attacks=True):
        """
        Run the simulation for the specified duration, generating data at the given interval.
        If allow_random_attacks is True, randomly start attacks during the simulation.
        """
        self.running = True
        simulation_start_time = time.time()

        # Determine when the first random attack might start
        # Allow at least some normal behavior before the first attack.
        min_time_to_first_attack = 10 # seconds
        max_time_to_first_attack = 30 # seconds
        next_attack_initiation_time = simulation_start_time + random.uniform(min_time_to_first_attack, max_time_to_first_attack) if allow_random_attacks else float('inf')

        print(f"Simulation run starting. Duration: {duration_seconds if duration_seconds else 'Infinite'}. Interval: {data_interval_seconds}s. Random Attacks: {allow_random_attacks}")
        if allow_random_attacks:
            print(f"Next potential random attack initiation around: {datetime.fromtimestamp(next_attack_initiation_time).strftime('%Y-%m-%d %H:%M:%S')}")

        log_file_path = os.path.join(LOG_DIR, f"ids_runtime_log_{datetime.now().strftime('%Y%m%d_%H%M%S')}.jsonl")
        print(f"Real-time logs will be written to: {log_file_path}")

        try:
            while self.running:
                current_loop_time = time.time()
                if duration_seconds is not None and (current_loop_time - simulation_start_time >= duration_seconds):
                    print("Specified simulation duration reached.")
                    break

                data_point = self.generate_data_point()
                self.logs.append(data_point) # Append to in-memory log (optional, can be removed for long runs)

                # Save individual data point to log file (JSON Lines format)
                try:
                    with open(log_file_path, 'a') as f:
                        f.write(json.dumps(data_point) + '\n')
                except IOError as e:
                    print(f"Error writing to log file {log_file_path}: {e}")
                    # Potentially stop simulation or retry, depending on desired robustness

                # Randomly start attacks if enabled and no attack is currently in progress
                if allow_random_attacks and not self.attack_in_progress and current_loop_time >= next_attack_initiation_time:
                    # Exclude "normal" from random attack choices
                    possible_attack_types = [at for at in ATTACK_TYPES if at != "normal"]
                    if possible_attack_types:
                        chosen_attack_type = random.choice(possible_attack_types)
                        attack_duration = random.uniform(10, 20) # Shorter, more frequent attacks for testing
                        attack_intensity = random.randint(1, 3) # Varied intensity
                        self.start_attack(chosen_attack_type, attack_duration, attack_intensity)

                        # Schedule when the system might consider initiating the *next* attack (after this one ends + a pause)
                        # Pause after an attack before considering another one.
                        min_pause_after_attack = 15
                        max_pause_after_attack = 40
                        next_attack_initiation_time = current_loop_time + attack_duration + random.uniform(min_pause_after_attack, max_pause_after_attack)
                        print(f"Next potential random attack initiation around: {datetime.fromtimestamp(next_attack_initiation_time).strftime('%Y-%m-%d %H:%M:%S')}")
                    else:
                        print("No attack types available to choose from (excluding 'normal').")
                        next_attack_initiation_time = float('inf') # No more attacks

                time.sleep(data_interval_seconds)
        finally:
            self.stop_attack() # Ensure any active attack is marked as stopped
            self.running = False # Explicitly set running to false
            print("Simulation run finished.")


    def start_simulation_thread(self, duration_seconds=None, data_interval_seconds=1.0, allow_random_attacks=True):
        """Start the simulation in a separate thread."""
        if self.simulation_thread and self.simulation_thread.is_alive():
            print("⚠️ Simulation thread is already running.")
            return

        self.simulation_thread = threading.Thread(
            target=self.run_simulation,
            args=(duration_seconds, data_interval_seconds, allow_random_attacks)
        )
        self.simulation_thread.daemon = True # Allows main program to exit even if thread is running
        self.simulation_thread.start()
        print(f"🚀 Simulation thread started. Random attacks: {allow_random_attacks}. Interval: {data_interval_seconds}s.")

    def stop_simulation(self):
        """Stop the simulation if it's running in a thread."""
        print("Attempting to stop simulation...")
        self.running = False # Signal the run_simulation loop to stop
        if self.simulation_thread and self.simulation_thread.is_alive():
            self.simulation_thread.join(timeout=max(1.0, data_interval_seconds * 2 if 'data_interval_seconds' in locals() else 2.0) ) # Wait for the thread to finish
            if self.simulation_thread.is_alive():
                print("⚠️ Simulation thread did not terminate cleanly after timeout.")
            else:
                print("✅ Simulation thread stopped.")
        else:
            print("ℹ️ Simulation thread was not running or already stopped.")
        self.stop_attack() # Also ensure any direct attack state is cleared

    def save_collected_logs(self, filename=None):
        """Save all in-memory logs collected during the session to a single JSON file."""
        if not self.logs:
            print("No in-memory logs to save.")
            return None

        if filename is None:
            # Use DATA_DIR defined in Section 1
            filename = os.path.join(DATA_DIR, f"ids_collected_session_logs_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json")

        try:
            with open(filename, 'w') as f:
                json.dump(self.logs, f, indent=2)
            print(f"📚 In-memory logs successfully saved to {filename} ({len(self.logs)} entries).")
            return filename
        except IOError as e:
            print(f"❌ Error saving in-memory logs to {filename}: {e}")
            return None

# --- Test the IDSSimulator class (optional, for this cell) ---
if ('google.colab' in sys.modules) or (__name__ == "__main__"):
    print("\nIDSSimulator Class Definition (with testing if run as main):")
    simulator_test_instance = IDSSimulator()

    # Test generating a single data point
    print("\n--- Testing single data point generation ---")
    single_point = simulator_test_instance.generate_data_point()
    print(json.dumps(single_point, indent=2))
    # Test starting an attack
    print("\n--- Testing attack simulation ---")
    simulator_test_instance.start_attack(attack_type="tcp_syn_flood", duration=5, intensity=2) # Short duration for test
    # Generate a few points while attack is active
    for i in range(3):
        print(f"\nData point during attack (iteration {i+1}):")
        point_during_attack = simulator_test_instance.generate_data_point()
        print(json.dumps(point_during_attack["overall_attack_status"], indent=2)) # Show attack status
        # print(json.dumps(point_during_attack["sensors"]["temperature_1"], indent=2)) # Example sensor
        # print(json.dumps(point_during_attack["network_devices"]["device_0"], indent=2)) # Example device
        time.sleep(0.1) # Small delay for readability

    # Manually check if attack should stop (generate_data_point does this, but we can force it for testing after duration)
    print(f"\nWaiting for attack to auto-stop (simulated {simulator_test_instance.attack_duration}s)...")
    # time.sleep(simulator_test_instance.attack_duration + 0.5) # Wait for attack duration to pass
    # print("Generating point after attack duration:")
    # point_after_attack_duration = simulator_test_instance.generate_data_point() # This call should trigger stop_attack
    # print(json.dumps(point_after_attack_duration["overall_attack_status"], indent=2))

    simulator_test_instance.stop_attack() # Explicitly stop for test cleanup
    print("\n--- End IDSSimulator Test ---")
else:
    print("IDSSimulator Class Definition (Loaded as module).")

print("\n✅ Section 4 (IDSSimulator Class Definition) is ready.")


IDSSimulator Class Definition (with testing if run as main):
IDSSimulator initialized 7 sensors.
IDSSimulator initialized 5 network devices.

--- Testing single data point generation ---
{
  "timestamp": 1748475987.3643665,
  "sensors": {
    "temperature_1": {
      "timestamp": 1748475987.3643703,
      "sensor_id": "temperature_1",
      "value": 19.77,
      "unit": "\u00b0C",
      "status": "normal",
      "event_type": "sensor_reading"
    },
    "humidity_1": {
      "timestamp": 1748475987.3643792,
      "sensor_id": "humidity_1",
      "value": 49.17,
      "unit": "%",
      "status": "normal",
      "event_type": "sensor_reading"
    },
    "pressure_1": {
      "timestamp": 1748475987.3643866,
      "sensor_id": "pressure_1",
      "value": 1014.26,
      "unit": "hPa",
      "status": "normal",
      "event_type": "sensor_reading"
    },
    "vibration_1": {
      "timestamp": 1748475987.3643913,
      "sensor_id": "vibration_1",
      "value": 0.38,
      "unit": "g",
 

In [None]:
# @title 🧠 Section 5: IDSModelPredictor Class Definition
#Ensure necessary imports if running this cell standalone
import os
import sys # <<<< ADDED THIS IMPORT
import json
import time
import random # Should be available from Section 1
import pandas as pd
import numpy as np
import pickle
import tensorflow as tf
from sklearn.preprocessing import StandardScaler
from datetime import datetime # For printing timestamps in predict_on_stream

class IDSModelPredictor:
    """
    Loads a trained model and makes predictions on incoming data.
    """

    def __init__(self, model_path=None, feature_columns=None, scaler_path=None):
        self.model = None
        self.feature_columns = feature_columns
        self.scaler = None
        self.predictions = []
        self.running = False
        self.prediction_thread = None

        if not self.feature_columns:
            print("🔴 CRITICAL WARNING: 'feature_columns' list was not provided to IDSModelPredictor. " \
                  "The 'preprocess_data' method will not function correctly without it.")

        if model_path:
            self.load_model(model_path)
        else:
            print("🔴 WARNING: No model_path provided to IDSModelPredictor. Model not loaded.")

        if scaler_path:
            self.load_scaler(scaler_path)
        else:
            print("🔴 WARNING: No scaler_path provided to IDSModelPredictor. Initializing a new StandardScaler. " \
                  "This is for placeholder/testing only. For accurate predictions, " \
                  "the scaler used during training MUST be loaded and used.")
            self.scaler = StandardScaler() # Fallback, not ideal

    def load_model(self, model_path):
        """Load a trained Keras model from the specified .h5 path."""
        try:
            self.model = tf.keras.models.load_model(model_path)
            print(f"✅ Model successfully loaded from {model_path}")
            # self.model.summary()
            return True
        except Exception as e:
            print(f"❌ Error loading Keras model from {model_path}: {e}")
            self.model = None
            return False

    def load_scaler(self, scaler_path):
        """Load a pre-fitted scaler object from pickle."""
        try:
            with open(scaler_path, 'rb') as f:
                self.scaler = pickle.load(f)
            print(f"✅ Scaler successfully loaded from {scaler_path}")
            if not (hasattr(self.scaler, 'mean_') and self.scaler.mean_ is not None):
                 print("🔶 WARNING: Loaded scaler does not appear to be fitted (missing 'mean_' attribute). Preprocessing might fail or be incorrect.")
        except Exception as e:
            print(f"❌ Error loading scaler from {scaler_path}: {e}")
            print("   A new, unfitted StandardScaler will be used as a fallback (suboptimal).")
            if not self.scaler:
                 self.scaler = StandardScaler()


    def preprocess_data(self, data_point):
        if not self.feature_columns:
            print("🔴 ERROR: IDSModelPredictor.feature_columns list is not set. Cannot preprocess.")
            return None

        raw_feature_values = {name: 0.0 for name in self.feature_columns}

        # --- START USER IMPLEMENTATION TO MAP SIMULATED DATA TO MODEL FEATURES ---
        # This is a placeholder and needs significant adaptation by you.
        network_devices_data = data_point.get("network_devices", {})
        sensors_data = data_point.get("sensors", {})
        overall_status = data_point.get("overall_attack_status", {})
        active_attack = overall_status.get("active_attack_type", "normal")

        if network_devices_data:
            all_traffic = [d.get("traffic_total_bytes", 0) for d in network_devices_data.values()]
            if 'tcp.len' in raw_feature_values:
                raw_feature_values['tcp.len'] = sum(all_traffic) / len(all_traffic) if all_traffic else 0.0
            if 'tcp.dstport' in raw_feature_values:
                 raw_feature_values['tcp.dstport'] = 80.0 if "http" in active_attack.lower() or "sql" in active_attack.lower() else 0.0
            if 'tcp.connection.syn' in raw_feature_values and "syn_flood" in active_attack.lower():
                raw_feature_values['tcp.connection.syn'] = 1.0

        if 'mbtcp.len' in raw_feature_values and "plc_register_A" in sensors_data:
            plc_val = sensors_data["plc_register_A"].get("value")
            raw_feature_values['mbtcp.len'] = float(plc_val) if plc_val is not None else 0.0
        # --- END USER IMPLEMENTATION ---

        feature_vector_list = [raw_feature_values.get(col_name, 0.0) for col_name in self.feature_columns]
        features_df = pd.DataFrame([feature_vector_list], columns=self.feature_columns)

        if not self.scaler:
            print("🔴 ERROR: Scaler not available in preprocess_data. Cannot scale features.")
            return None

        try:
            is_scaler_fitted = hasattr(self.scaler, 'mean_') and self.scaler.mean_ is not None
            if not is_scaler_fitted:
                 print("⚠️ SCALER NOT FITTED. Fitting with current data sample. DEMO ONLY. Load pre-fitted scaler.")
                 scaled_features_array = self.scaler.fit_transform(features_df)
            else:
                 scaled_features_array = self.scaler.transform(features_df)
        except Exception as e:
            print(f"🔴 ERROR during feature scaling: {e}. Features: {features_df.to_dict()}")
            return None

        reshaped_features = scaled_features_array.reshape((1, 1, len(self.feature_columns)))
        return reshaped_features

    def predict(self, data_point):
        if self.model is None: return None
        processed_input = self.preprocess_data(data_point)
        if processed_input is None: return None
        try:
            prediction_probs = self.model.predict(processed_input, verbose=0)[0]
            predicted_class_index = np.argmax(prediction_probs)
            predicted_class_name = "Attack" if predicted_class_index == 1 else "Normal"
            actual_attack_type = data_point.get("overall_attack_status", {}).get("active_attack_type", "normal")
            is_actually_attack = data_point.get("overall_attack_status", {}).get("is_attack_ongoing", False)
            actual_label_for_comparison = "Attack" if is_actually_attack else "Normal"
            result = {
                "timestamp": data_point.get("timestamp", time.time()),
                "predicted_class": predicted_class_name,
                "prediction_probabilities": prediction_probs.tolist(),
                "actual_type_from_simulator": actual_attack_type,
                "is_prediction_correct": predicted_class_name == actual_label_for_comparison
            }
            self.predictions.append(result)
            return result
        except Exception as e:
            print(f"❌ Error during model prediction: {e}")
            return None

    def run_predictions_on_stream(self, simulator_instance, data_interval_seconds=1.0):
        self.running = True
        print(f"🤖 Prediction thread started. Checking simulator data every {data_interval_seconds}s.")
        last_processed_log_timestamp = 0
        self._data_interval_seconds_for_thread = data_interval_seconds

        while self.running:
            current_simulator_logs = list(simulator_instance.logs)
            new_logs_to_process = [log for log in current_simulator_logs if log.get('timestamp', 0) > last_processed_log_timestamp]

            if new_logs_to_process:
                for data_point in new_logs_to_process:
                    if not self.running: break
                    prediction_result = self.predict(data_point)
                    if prediction_result:
                        ts_human = datetime.fromtimestamp(prediction_result['timestamp']).strftime('%H:%M:%S')
                        probs_str = ", ".join([f"{p:.2f}" for p in prediction_result['prediction_probabilities']])
                        print(f"🔎 Prediction @ {ts_human}: Pred={prediction_result['predicted_class']}, ActualSimType='{prediction_result['actual_type_from_simulator']}' (Probs: [{probs_str}])")
                    last_processed_log_timestamp = data_point.get('timestamp', last_processed_log_timestamp)
            time.sleep(data_interval_seconds)
        print("🤖 Prediction thread finished.")

    def start_prediction_thread(self, simulator_instance, data_interval_seconds=1.0):
        if not self.model: print("❌ Cannot start prediction: Model not loaded."); return
        if self.prediction_thread and self.prediction_thread.is_alive(): print("⚠️ Prediction thread already running."); return
        self._data_interval_seconds_for_thread = data_interval_seconds
        self.prediction_thread = threading.Thread(target=self.run_predictions_on_stream, args=(simulator_instance, data_interval_seconds))
        self.prediction_thread.daemon = True
        self.prediction_thread.start()

    def stop_predictions(self):
        print("Attempting to stop prediction thread...")
        self.running = False
        if self.prediction_thread and self.prediction_thread.is_alive():
            timeout_wait = getattr(self, '_data_interval_seconds_for_thread', 1.0) * 2 + 1
            self.prediction_thread.join(timeout=max(2.0, timeout_wait))
            if self.prediction_thread.is_alive(): print("⚠️ Prediction thread did not terminate cleanly.")
            else: print("✅ Prediction thread stopped.")
        else: print("ℹ️ Prediction thread was not running or already stopped.")

    def save_predictions(self, filename=None):
        if not self.predictions: print("No predictions to save."); return None
        if filename is None:
            # Use DATA_DIR from Section 1
            filename = os.path.join(DATA_DIR, f"ids_model_predictions_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json")
        try:
            with open(filename, 'w') as f: json.dump(self.predictions, f, indent=2)
            print(f"📚 Predictions saved to {filename} ({len(self.predictions)} entries).")
            return filename
        except IOError as e: print(f"❌ Error saving predictions: {e}"); return None

# --- Configuration for Predictor ---
YOUR_MODEL_PATH = "/content/drive/MyDrive/Colab Notebooks/results/lstm_model.h5"
YOUR_SCALER_PATH = "/content/drive/MyDrive/Colab Notebooks/results/scaler.pkl"

FEATURE_NAMES_FOR_MODEL = [
    'arp.hw.size', 'http.content_length', 'http.response', 'http.tls_port',
    'tcp.ack_raw', 'tcp.checksum', 'tcp.connection.fin', 'tcp.connection.rst',
    'tcp.connection.syn', 'tcp.connection.synack', 'tcp.dstport', 'tcp.flags.ack',
    'tcp.len', 'udp.stream', 'udp.time_delta', 'dns.qry.qu', 'dns.qry.type',
    'dns.retransmission', 'dns.retransmit_request', 'dns.retransmit_request_in',
    'mqtt.conflag.cleansess', 'mqtt.hdrflags', 'mqtt.len', 'mqtt.msg_decoded_as',
    'mbtcp.len', 'mbtcp.trans_id', 'mbtcp.unit_id'
]
print(f"ℹ️ IDSModelPredictor configured with {len(FEATURE_NAMES_FOR_MODEL)} features.")

if not os.path.exists(YOUR_MODEL_PATH):
    print(f"🛑 CRITICAL WARNING: Model file NOT FOUND at {YOUR_MODEL_PATH}.")
if YOUR_SCALER_PATH and not os.path.exists(YOUR_SCALER_PATH):
    print(f"🛑 WARNING: Scaler file NOT FOUND at {YOUR_SCALER_PATH}. " \
          "Predictor will use a new, unfitted scaler (predictions will be inaccurate).")
elif not YOUR_SCALER_PATH:
    print(f"🔶 INFO: YOUR_SCALER_PATH is not set. Predictor will use a new, unfitted scaler (predictions will be inaccurate).")


# --- Test the IDSModelPredictor class ---
if ('google.colab' in sys.modules) or (__name__ == "__main__"): # Check if in Colab or run as script
    print("\nIDSModelPredictor Class (Testing Block):")

    if not FEATURE_NAMES_FOR_MODEL:
        print("\n⚠️ Predictor test cannot run: FEATURE_NAMES_FOR_MODEL is empty.")
    elif not os.path.exists(YOUR_MODEL_PATH):
        print(f"\n⚠️ Predictor test cannot run: Model file not found at '{YOUR_MODEL_PATH}'.")
    else:
        print("\n--- Initializing components for Predictor Test ---")
        if 'IDSSimulator' not in locals() or 'Sensor' not in locals() or 'NetworkDevice' not in locals():
            print("🔴 ERROR: IDSSimulator, Sensor, or NetworkDevice class not found. Please ensure Sections 2, 3, and 4 were run successfully.")
        else:
            test_simulator_for_predictor_test_block = IDSSimulator()

            predictor_test_instance = IDSModelPredictor(
                model_path=YOUR_MODEL_PATH,
                feature_columns=FEATURE_NAMES_FOR_MODEL,
                scaler_path=YOUR_SCALER_PATH
            )

            if predictor_test_instance.model and predictor_test_instance.scaler:
                print("\n--- Testing IDSModelPredictor.predict() method with one sample ---")

                test_data_point = test_simulator_for_predictor_test_block.generate_data_point()

                print("\n    Sample simulated data point (structure preview):")
                print(f"    Timestamp: {test_data_point.get('timestamp')}")
                print(f"    Overall attack status: {test_data_point.get('overall_attack_status')}")

                print("\n    Attempting to preprocess and predict the sample data point...")
                prediction_output = predictor_test_instance.predict(test_data_point)

                if prediction_output:
                    print("\n    Prediction Output (Sample):")
                    print(json.dumps(prediction_output, indent=2))
                else:
                    print("\n    Prediction failed or returned None. Review `preprocess_data` and scaler status.")
            else:
                print("\n    Predictor test skipped: Model or Scaler could not be loaded/initialized properly within IDSModelPredictor. Check paths and file integrity.")

    print("\n--- End IDSModelPredictor Test ---")

print("\n✅ Section 5 (IDSModelPredictor Class Definition) is ready.")

ℹ️ IDSModelPredictor configured with 27 features.

IDSModelPredictor Class (Testing Block):

--- Initializing components for Predictor Test ---
IDSSimulator initialized 7 sensors.
IDSSimulator initialized 5 network devices.




✅ Model successfully loaded from /content/drive/MyDrive/Colab Notebooks/results/lstm_model.h5
✅ Scaler successfully loaded from /content/drive/MyDrive/Colab Notebooks/results/scaler.pkl

--- Testing IDSModelPredictor.predict() method with one sample ---

    Sample simulated data point (structure preview):
    Timestamp: 1748475987.8551893
    Overall attack status: {'is_attack_ongoing': False, 'active_attack_type': 'normal', 'current_attack_intensity': 0, 'attack_elapsed_time_seconds': 0}

    Attempting to preprocess and predict the sample data point...

    Prediction Output (Sample):
{
  "timestamp": 1748475987.8551893,
  "predicted_class": "Attack",
  "prediction_probabilities": [
    6.583260412501568e-13,
    1.0
  ],
  "actual_type_from_simulator": "normal",
  "is_prediction_correct": false
}

--- End IDSModelPredictor Test ---

✅ Section 5 (IDSModelPredictor Class Definition) is ready.


In [None]:
# Ensure necessary imports are available (should be from previous cells)
# import os, sys, json, time, random, pandas as pd, numpy as np, pickle
# import tensorflow as tf
# from sklearn.preprocessing import StandardScaler
# from datetime import datetime

class IDSModelPredictor:
    """
    Loads a trained model and makes predictions on incoming data.
    """

    def __init__(self, model_path=None, feature_columns=None, scaler_path=None):
        self.model = None
        self.feature_columns = feature_columns
        self.scaler = None
        self.predictions = []
        self.running = False
        self.prediction_thread = None

        if not self.feature_columns:
            print("🔴 CRITICAL WARNING: 'feature_columns' list was not provided to IDSModelPredictor. " \
                  "The 'preprocess_data' method will not function correctly without it.")

        if model_path:
            self.load_model(model_path)
        else:
            print("🔴 WARNING: No model_path provided to IDSModelPredictor. Model not loaded.")

        if scaler_path:
            self.load_scaler(scaler_path)
        else:
            print("🔴 WARNING: No scaler_path provided. Initializing a new StandardScaler. " \
                  "This is for placeholder/testing only. Load the original scaler for accurate predictions.")
            self.scaler = StandardScaler()

    def load_model(self, model_path):
        try:
            self.model = tf.keras.models.load_model(model_path)
            print(f"✅ Model successfully loaded from {model_path}")
            return True
        except Exception as e:
            print(f"❌ Error loading Keras model from {model_path}: {e}")
            self.model = None
            return False

    def load_scaler(self, scaler_path):
        try:
            with open(scaler_path, 'rb') as f:
                self.scaler = pickle.load(f)
            print(f"✅ Scaler successfully loaded from {scaler_path}")
            if not (hasattr(self.scaler, 'mean_') and self.scaler.mean_ is not None):
                 print("🔶 WARNING: Loaded scaler does not appear to be fitted. Preprocessing may be incorrect.")
        except Exception as e:
            print(f"❌ Error loading scaler from {scaler_path}: {e}")
            if not self.scaler: self.scaler = StandardScaler()

    def preprocess_data(self, data_point):
        if not self.feature_columns:
            print("🔴 ERROR: IDSModelPredictor.feature_columns list is not set. Cannot preprocess.")
            return None

        # Initialize all expected features with a default value (0.0)
        raw_feature_values = {feature_name: 0.0 for feature_name in self.feature_columns}

        # Extract data from the simulator's data_point
        network_devices_data = data_point.get("network_devices", {})
        sensors_data = data_point.get("sensors", {})
        overall_status = data_point.get("overall_attack_status", {})
        active_attack = overall_status.get("active_attack_type", "normal")
        is_attack_ongoing = overall_status.get("is_attack_ongoing", False)
        attack_intensity = overall_status.get("current_attack_intensity", 1)


        # --- Attempting to map simulated data to the 27 features ---
        # Many of these will be rough approximations or will remain 0.0
        # due to the high-level nature of the current simulator.

        # Network-based features (approximations)
        if network_devices_data:
            # Aggregate some values from all devices
            total_traffic_bytes = sum(d.get("traffic_total_bytes", 0) for d in network_devices_data.values())
            total_active_connections = sum(d.get("active_connections", 0) for d in network_devices_data.values())

            # Example: 'tcp.len' - Could be average packet size if we had packet count,
            # or just a fraction of total traffic, or total traffic if it's a single flow.
            # For now, let's use total traffic as a proxy, scaled down.
            raw_feature_values['tcp.len'] = total_traffic_bytes / (total_active_connections + 1) # Avoid division by zero

            # Example: 'tcp.dstport' - Very speculative.
            # If a specific attack known to target a port is active, we might set it.
            if is_attack_ongoing:
                if active_attack == "sql_injection": raw_feature_values['tcp.dstport'] = 80.0
                elif active_attack == "tcp_syn_flood": raw_feature_values['tcp.dstport'] = 80.0 # Common target
                elif active_attack == "port_scan": # Port scan implies multiple ports, pick one for representation or an average
                    # The 'scanned_ports_count_estimate' is a count, not a port number.
                    # Let's use a common scanned port range start.
                    raw_feature_values['tcp.dstport'] = 1.0 # Just an example
                elif active_attack == "udp_flood" and "dns" in active_attack.lower(): # If DNS based UDP flood
                    raw_feature_values['tcp.dstport'] = 53.0 # DNS port (though this feature is tcp.dstport)
            else: # Normal
                raw_feature_values['tcp.dstport'] = 0.0 # Or a typical port for normal traffic if known

            # Example: 'tcp.connection.syn'
            # Set to 1.0 if a SYN flood or port scan is active
            if active_attack in ["tcp_syn_flood", "port_scan"]:
                raw_feature_values['tcp.connection.syn'] = 1.0 * attack_intensity # Scale by intensity

            # Example: 'tcp.flags.ack' - Could be high during normal traffic or some attacks
            # This is a very rough guess
            raw_feature_values['tcp.flags.ack'] = 1.0 if total_active_connections > 10 else 0.0


        # Sensor-based features (only if they map to network protocol features)
        # mbtcp features are for Modbus/TCP, often used with PLCs
        plc_sensor_id = "plc_register_A"
        if plc_sensor_id in sensors_data:
            plc_reading = sensors_data[plc_sensor_id]
            if plc_reading and plc_reading.get("value") is not None:
                raw_feature_values['mbtcp.len'] = float(plc_reading.get("value", 0.0)) # Assuming value maps to length
                raw_feature_values['mbtcp.trans_id'] = float(plc_reading.get("timestamp", 0.0) % 1000) # Using timestamp as a proxy for transaction ID
                raw_feature_values['mbtcp.unit_id'] = 1.0 # Assuming unit ID 1 for this PLC

        # Features that are hard to simulate without detailed packet info will remain 0.0 or need defaults:
        # 'arp.hw.size': Typically 6 for Ethernet. Let's set a default if no ARP spoofing.
        raw_feature_values['arp.hw.size'] = 6.0 if active_attack != "arp_spoofing" else 0.0 # Or a different value during spoofing

        # 'http.content_length', 'http.response', 'http.tls_port':
        # These would need specific HTTP simulation.
        if active_attack in ["sql_injection", "command_injection"] and "http" in active_attack.lower(): # if it's an HTTP based attack
            raw_feature_values['http.content_length'] = random.uniform(50,500) # Example
            raw_feature_values['http.response'] = 1.0 # Assuming a response is generated

        # TCP specific flags and values:
        # 'tcp.ack_raw', 'tcp.checksum', 'tcp.connection.fin', 'tcp.connection.rst', 'tcp.connection.synack'
        # These are highly specific and would remain 0.0 or need very specific attack simulation logic.

        # UDP specific:
        # 'udp.stream', 'udp.time_delta'
        if active_attack == "udp_flood":
            raw_feature_values['udp.stream'] = float(attack_intensity) # Example mapping
            raw_feature_values['udp.time_delta'] = random.uniform(0.001, 0.1) # Example

        # DNS specific:
        # 'dns.qry.qu', 'dns.qry.type', 'dns.retransmission', 'dns.retransmit_request', 'dns.retransmit_request_in'
        # Would require DNS query/response simulation.

        # MQTT specific:
        # 'mqtt.conflag.cleansess', 'mqtt.hdrflags', 'mqtt.len', 'mqtt.msg_decoded_as'
        # Would require MQTT protocol simulation.

        # Final vector assembly
        feature_vector_list = [raw_feature_values.get(col_name, 0.0) for col_name in self.feature_columns]
        features_df = pd.DataFrame([feature_vector_list], columns=self.feature_columns)

        if not self.scaler:
            print("🔴 ERROR: Scaler not available in preprocess_data. Cannot scale features.")
            return None

        try:
            is_scaler_fitted = hasattr(self.scaler, 'mean_') and self.scaler.mean_ is not None
            if not is_scaler_fitted:
                 print("⚠️ SCALER NOT FITTED. Fitting with current data sample. DEMO ONLY. Load pre-fitted scaler.")
                 scaled_features_array = self.scaler.fit_transform(features_df)
            else:
                 scaled_features_array = self.scaler.transform(features_df)
        except Exception as e:
            print(f"🔴 ERROR during feature scaling: {e}. Features: {features_df.to_dict()}")
            return None

        reshaped_features = scaled_features_array.reshape((1, 1, len(self.feature_columns)))
        return reshaped_features

    # ... (predict, run_predictions_on_stream, start_prediction_thread, stop_predictions, save_predictions methods remain the same as in the previous Section 5 response) ...
    def predict(self, data_point):
        if self.model is None: return None
        processed_input = self.preprocess_data(data_point)
        if processed_input is None: return None
        try:
            prediction_probs = self.model.predict(processed_input, verbose=0)[0]
            predicted_class_index = np.argmax(prediction_probs)
            predicted_class_name = "Attack" if predicted_class_index == 1 else "Normal"
            actual_attack_type = data_point.get("overall_attack_status", {}).get("active_attack_type", "normal")
            is_actually_attack = data_point.get("overall_attack_status", {}).get("is_attack_ongoing", False)
            actual_label_for_comparison = "Attack" if is_actually_attack else "Normal"
            result = {
                "timestamp": data_point.get("timestamp", time.time()),
                "predicted_class": predicted_class_name,
                "prediction_probabilities": prediction_probs.tolist(), # [prob_normal, prob_attack]
                "actual_type_from_simulator": actual_attack_type,
                "is_prediction_correct": predicted_class_name == actual_label_for_comparison
            }
            self.predictions.append(result)
            return result
        except Exception as e:
            print(f"❌ Error during model prediction: {e}")
            return None

    def run_predictions_on_stream(self, simulator_instance, data_interval_seconds=1.0):
        self.running = True
        print(f"🤖 Prediction thread started. Checking simulator data every {data_interval_seconds}s.")
        last_processed_log_timestamp = 0
        self._data_interval_seconds_for_thread = data_interval_seconds

        while self.running:
            current_simulator_logs = list(simulator_instance.logs)
            new_logs_to_process = [log for log in current_simulator_logs if log.get('timestamp', 0) > last_processed_log_timestamp]

            if new_logs_to_process:
                for data_point in new_logs_to_process:
                    if not self.running: break
                    prediction_result = self.predict(data_point)
                    if prediction_result:
                        ts_human = datetime.fromtimestamp(prediction_result['timestamp']).strftime('%H:%M:%S')
                        probs_str = ", ".join([f"{p:.2f}" for p in prediction_result['prediction_probabilities']])
                        print(f"🔎 Prediction @ {ts_human}: Pred={prediction_result['predicted_class']}, ActualSimType='{prediction_result['actual_type_from_simulator']}' (Probs: [{probs_str}])")
                    last_processed_log_timestamp = data_point.get('timestamp', last_processed_log_timestamp)
            time.sleep(data_interval_seconds)
        print("🤖 Prediction thread finished.")

    def start_prediction_thread(self, simulator_instance, data_interval_seconds=1.0):
        if not self.model: print("❌ Cannot start prediction: Model not loaded."); return
        if self.prediction_thread and self.prediction_thread.is_alive(): print("⚠️ Prediction thread already running."); return
        self._data_interval_seconds_for_thread = data_interval_seconds
        self.prediction_thread = threading.Thread(target=self.run_predictions_on_stream, args=(simulator_instance, data_interval_seconds))
        self.prediction_thread.daemon = True
        self.prediction_thread.start()

    def stop_predictions(self):
        print("Attempting to stop prediction thread...")
        self.running = False
        if self.prediction_thread and self.prediction_thread.is_alive():
            timeout_wait = getattr(self, '_data_interval_seconds_for_thread', 1.0) * 2 + 1
            self.prediction_thread.join(timeout=max(2.0, timeout_wait))
            if self.prediction_thread.is_alive(): print("⚠️ Prediction thread did not terminate cleanly.")
            else: print("✅ Prediction thread stopped.")
        else: print("ℹ️ Prediction thread was not running or already stopped.")

    def save_predictions(self, filename=None):
        if not self.predictions: print("No predictions to save."); return None
        if filename is None:
            # Use DATA_DIR from Section 1
            filename = os.path.join(DATA_DIR, f"ids_model_predictions_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json")
        try:
            with open(filename, 'w') as f: json.dump(self.predictions, f, indent=2)
            print(f"📚 Predictions saved to {filename} ({len(self.predictions)} entries).")
            return filename
        except IOError as e: print(f"❌ Error saving predictions: {e}"); return None

# --- Configuration for Predictor (using paths from your successful MLP_LSTM run) ---
YOUR_MODEL_PATH = "/content/drive/MyDrive/Colab Notebooks/results/lstm_model.h5"
YOUR_SCALER_PATH = "/content/drive/MyDrive/Colab Notebooks/results/scaler.pkl"

FEATURE_NAMES_FOR_MODEL = [
    'arp.hw.size', 'http.content_length', 'http.response', 'http.tls_port',
    'tcp.ack_raw', 'tcp.checksum', 'tcp.connection.fin', 'tcp.connection.rst',
    'tcp.connection.syn', 'tcp.connection.synack', 'tcp.dstport', 'tcp.flags.ack',
    'tcp.len', 'udp.stream', 'udp.time_delta', 'dns.qry.qu', 'dns.qry.type',
    'dns.retransmission', 'dns.retransmit_request', 'dns.retransmit_request_in',
    'mqtt.conflag.cleansess', 'mqtt.hdrflags', 'mqtt.len', 'mqtt.msg_decoded_as',
    'mbtcp.len', 'mbtcp.trans_id', 'mbtcp.unit_id'
]
print(f"ℹ️ IDSModelPredictor configured with {len(FEATURE_NAMES_FOR_MODEL)} features.")

if not os.path.exists(YOUR_MODEL_PATH):
    print(f"🛑 CRITICAL WARNING: Model file NOT FOUND at {YOUR_MODEL_PATH}.")
if YOUR_SCALER_PATH and not os.path.exists(YOUR_SCALER_PATH): # Check only if path is provided
    print(f"🛑 WARNING: Scaler file NOT FOUND at {YOUR_SCALER_PATH}.")
elif not YOUR_SCALER_PATH:
     print(f"🔶 INFO: YOUR_SCALER_PATH is not set. Predictor will use a new, unfitted scaler (not recommended for accuracy).")


# --- Test the IDSModelPredictor class ---
if ('google.colab' in sys.modules) or (__name__ == "__main__"):
    print("\nIDSModelPredictor Class (Testing Block with Improved Preprocessing Attempt):")

    if not FEATURE_NAMES_FOR_MODEL:
        print("\n⚠️ Predictor test cannot run: FEATURE_NAMES_FOR_MODEL is empty.")
    elif not os.path.exists(YOUR_MODEL_PATH):
        print(f"\n⚠️ Predictor test cannot run: Model file not found at '{YOUR_MODEL_PATH}'.")
    else:
        print("\n--- Initializing components for Predictor Test ---")
        if 'IDSSimulator' not in locals() or 'Sensor' not in locals() or 'NetworkDevice' not in locals():
            print("🔴 ERROR: Prerequisite classes (IDSSimulator, Sensor, NetworkDevice) not found. Ensure Sections 1-4 ran.")
        else:
            test_simulator_for_predictor_test_block = IDSSimulator()

            predictor_test_instance = IDSModelPredictor(
                model_path=YOUR_MODEL_PATH,
                feature_columns=FEATURE_NAMES_FOR_MODEL,
                scaler_path=YOUR_SCALER_PATH
            )

            if predictor_test_instance.model and predictor_test_instance.scaler:
                print("\n--- Testing IDSModelPredictor.predict() with one sample using updated preprocess_data ---")

                print("\n   Generating a NORMAL data point for testing...")
                test_data_point_normal = test_simulator_for_predictor_test_block.generate_data_point()
                prediction_normal = predictor_test_instance.predict(test_data_point_normal)
                if prediction_normal:
                    print("    Prediction for NORMAL event:")
                    print(json.dumps(prediction_normal, indent=2))

                print("\n   Generating an ATTACK data point for testing (tcp_syn_flood)...")
                test_simulator_for_predictor_test_block.start_attack(attack_type="tcp_syn_flood", duration=1, intensity=3)
                test_data_point_attack = test_simulator_for_predictor_test_block.generate_data_point()
                test_simulator_for_predictor_test_block.stop_attack() # Clean up attack state
                prediction_attack = predictor_test_instance.predict(test_data_point_attack)
                if prediction_attack:
                    print("    Prediction for ATTACK event (tcp_syn_flood):")
                    print(json.dumps(prediction_attack, indent=2))

                print("\n   Note: The accuracy of these predictions is highly dependent on how well the " \
                      "simulated features in `preprocess_data` match the real dataset features your model learned.")
            else:
                print("\n    Predictor test skipped: Model or Scaler could not be loaded/initialized properly.")

    print("\n--- End IDSModelPredictor Test ---")

print("\n✅ Section 5 (IDSModelPredictor Class Definition with improved preprocess_data) is ready.")



ℹ️ IDSModelPredictor configured with 27 features.

IDSModelPredictor Class (Testing Block with Improved Preprocessing Attempt):

--- Initializing components for Predictor Test ---
IDSSimulator initialized 7 sensors.
IDSSimulator initialized 5 network devices.
✅ Model successfully loaded from /content/drive/MyDrive/Colab Notebooks/results/lstm_model.h5
✅ Scaler successfully loaded from /content/drive/MyDrive/Colab Notebooks/results/scaler.pkl

--- Testing IDSModelPredictor.predict() with one sample using updated preprocess_data ---

   Generating a NORMAL data point for testing...
    Prediction for NORMAL event:
{
  "timestamp": 1748475988.416281,
  "predicted_class": "Attack",
  "prediction_probabilities": [
    1.8337467508899863e-06,
    0.9999982118606567
  ],
  "actual_type_from_simulator": "normal",
  "is_prediction_correct": false
}

   Generating an ATTACK data point for testing (tcp_syn_flood)...
💥 Attack started: tcp_syn_flood (Intensity: 3, Duration: 1.00s)
🛡️ Attack stopped

In [None]:
# Ensure necessary imports if running this cell standalone
import os
import sys # For sys.executable
import json # For saving results
import time
import threading
from datetime import datetime # For filenames
# Sensor, NetworkDevice, IDSSimulator, IDSModelPredictor classes should be defined
# from running the previous cells (Sections 2, 3, 4, 5).
# BASE_DIR, DATA_DIR, LOG_DIR, YOUR_MODEL_PATH, YOUR_SCALER_PATH, FEATURE_NAMES_FOR_MODEL
# should also be defined from previous cells (Sections 1 and 5).

class IDSOrchestrator:
    """
    Orchestrates the IDS components: simulator, model predictor, and (optionally) a dashboard.
    """

    def __init__(self, model_path_from_config, feature_columns_from_config, scaler_path_from_config, dashboard_script_name="dashboard.py"):
        # Initialize simulator
        if 'IDSSimulator' not in globals():
            print("🔴 ERROR: IDSSimulator class definition not found. Please run Section 4.")
            self.simulator = None
        else:
            self.simulator = IDSSimulator() # This will print its own init messages

        # Initialize predictor
        if 'IDSModelPredictor' not in globals():
            print("🔴 ERROR: IDSModelPredictor class definition not found. Please run Section 5.")
            self.predictor = None
        else:
            self.predictor = IDSModelPredictor(
                model_path=model_path_from_config,
                feature_columns=feature_columns_from_config,
                scaler_path=scaler_path_from_config
            ) # This will print its own init messages for model and scaler

        # BASE_DIR should be globally available from Section 1
        self.dashboard_script_path = os.path.join(BASE_DIR, dashboard_script_name)
        self.running = False
        self.dashboard_process = None
        print("✅ IDSOrchestrator initialized.")
        if not self.simulator: print("   - Simulator: Not initialized (IDSSimulator class missing)")
        if not self.predictor: print("   - Predictor: Not initialized (IDSModelPredictor class missing)")
        # Further checks for model/scaler loading are done by IDSModelPredictor's init

    def start(self, simulation_duration_seconds=None, data_interval_seconds=1.0, allow_random_attacks=True, start_dashboard=False):
        """Start the IDS system: simulation, prediction, and optionally the dashboard."""
        if not self.simulator or not self.predictor:
            print("❌ Orchestrator cannot start: Simulator or Predictor not properly initialized (check previous cell outputs).")
            return

        self.running = True
        print(f"\n🚀 Starting IDS Orchestration... (Duration: {simulation_duration_seconds or 'Continuous'}, Data Interval: {data_interval_seconds}s)")

        # Start simulator thread
        self.simulator.start_simulation_thread(
            duration_seconds=simulation_duration_seconds,
            data_interval_seconds=data_interval_seconds,
            allow_random_attacks=allow_random_attacks
        )

        # Start predictor thread (only if model was successfully loaded in predictor's init)
        if self.predictor.model and self.predictor.scaler:
            # Prediction interval can be the same as data interval, or different if desired
            self.predictor.start_prediction_thread(
                simulator_instance=self.simulator, # Pass the simulator instance
                data_interval_seconds=data_interval_seconds # Predictor checks for new logs at this interval
            )
        else:
            print("🔶 INFO: Model or Scaler not loaded in predictor. Real-time predictions will not run.")
            print("   Please check model/scaler paths and ensure they loaded correctly in Section 5.")


        # Start dashboard (optional)
        if start_dashboard:
            if os.path.exists(self.dashboard_script_path):
                try:
                    import subprocess
                    print(f"Attempting to start dashboard: {self.dashboard_script_path}")
                    # For Colab, running Streamlit this way is complex for viewing.
                    # It's better to run dashboard.py in its own dedicated environment/process.
                    self.dashboard_process = subprocess.Popen(
                        [sys.executable, self.dashboard_script_path],
                        stdout=subprocess.PIPE, stderr=subprocess.PIPE
                    )
                    print(f"✅ Dashboard process initiated (PID: {self.dashboard_process.pid}). Monitor its logs separately.")
                    # Check if it errored out immediately (very basic check)
                    time.sleep(3) # Give it a moment
                    if self.dashboard_process.poll() is not None:
                        stdout, stderr = self.dashboard_process.communicate()
                        print(f"⚠️ Dashboard process terminated early. RC: {self.dashboard_process.returncode}")
                        print(f"   Dashboard stdout: {stdout.decode(errors='ignore')}")
                        print(f"   Dashboard stderr: {stderr.decode(errors='ignore')}")
                        self.dashboard_process = None
                except Exception as e:
                    print(f"❌ Error starting dashboard subprocess: {e}")
                    self.dashboard_process = None
            else:
                print(f"🔶 INFO: Dashboard script not found at '{self.dashboard_script_path}'. Dashboard not started.")

        print("✅ IDS Orchestration components (Simulator & Predictor Threads) initiated.")

    def stop(self):
        """Stop all components of the IDS system."""
        print("\n🛑 Stopping IDS Orchestration...")
        self.running = False

        if self.predictor:
            self.predictor.stop_predictions() # Signals predictor's internal loop to stop
        if self.simulator:
            self.simulator.stop_simulation() # Signals simulator's internal loop to stop

        # Wait for threads to actually finish (join them)
        if self.predictor and self.predictor.prediction_thread and self.predictor.prediction_thread.is_alive():
            print("   Waiting for prediction thread to complete...")
            self.predictor.prediction_thread.join(timeout=5.0)
        if self.simulator and self.simulator.simulation_thread and self.simulator.simulation_thread.is_alive():
            print("   Waiting for simulation thread to complete...")
            self.simulator.simulation_thread.join(timeout=5.0)

        if self.dashboard_process:
            try:
                print("Terminating dashboard process...")
                self.dashboard_process.terminate()
                self.dashboard_process.wait(timeout=5)
                print("✅ Dashboard process terminated.")
            except subprocess.TimeoutExpired:
                print("⚠️ Dashboard process did not terminate gracefully, killing...")
                self.dashboard_process.kill()
                print("✅ Dashboard process killed.")
            except Exception as e:
                print(f"❌ Error stopping dashboard process: {e}")
            finally:
                self.dashboard_process = None

        print("✅ IDS Orchestration fully stopped.")

    def save_session_results(self, base_filename_prefix=None):
        """Save all collected logs and predictions from the session."""
        if base_filename_prefix is None:
            # datetime should be available from Section 1 imports
            base_filename_prefix = f"ids_session_{datetime.now().strftime('%Y%m%d_%H%M%S')}"

        saved_files = {}
        print("\n💾 Saving session results...")
        # DATA_DIR and LOG_DIR should be globally available from Section 1
        if self.simulator:
            # This saves the self.logs list (in-memory copy) from the simulator
            sim_log_file = self.simulator.save_collected_logs(
                os.path.join(DATA_DIR, f"{base_filename_prefix}_simulator_collected_all.json")
            )
            if sim_log_file: saved_files['simulator_collected_logs'] = sim_log_file
            print(f"   Note: Continuous real-time logs from simulator were also saved to individual files in: {LOG_DIR}/")

        if self.predictor:
            pred_file = self.predictor.save_predictions(
                os.path.join(DATA_DIR, f"{base_filename_prefix}_model_predictions.json")
            )
            if pred_file: saved_files['model_predictions'] = pred_file

        if saved_files:
            print(f"✅ Session results saved. Check files: {saved_files}")
        else:
            print("ℹ️ No results to save or saving failed for all components.")
        return saved_files

# --- Test the IDSOrchestrator class (optional for this cell) ---
# This test block will run if the cell is executed directly.
if ('google.colab' in sys.modules) or (__name__ == "__main__"):
    print("\nIDSOrchestrator Class (Testing Block):")

    # Check if prerequisite variables (defined in previous cells) exist
    prerequisites_ok = True
    required_globals = ['BASE_DIR', 'YOUR_MODEL_PATH', 'YOUR_SCALER_PATH', 'FEATURE_NAMES_FOR_MODEL',
                        'IDSSimulator', 'IDSModelPredictor', 'Sensor', 'NetworkDevice']
    for req_var in required_globals:
        if req_var not in globals():
            print(f"🔴 ERROR: Prerequisite '{req_var}' not found in global scope. " \
                  "Please ensure all previous sections (1-5) were run successfully.")
            prerequisites_ok = False
            break

    if prerequisites_ok and (not os.path.exists(YOUR_MODEL_PATH) or (YOUR_SCALER_PATH and not os.path.exists(YOUR_SCALER_PATH))):
        print("🔴 ERROR: Model or Scaler file path is invalid or file not found. Orchestrator test cannot proceed meaningfully.")
        if not os.path.exists(YOUR_MODEL_PATH): print(f"   Missing Model: {YOUR_MODEL_PATH}")
        if YOUR_SCALER_PATH and not os.path.exists(YOUR_SCALER_PATH): print(f"   Missing Scaler: {YOUR_SCALER_PATH}")
        prerequisites_ok = False

    if prerequisites_ok:
        print("\n--- Initializing Orchestrator for Test ---")
        orchestrator_test_instance = IDSOrchestrator(
            model_path_from_config=YOUR_MODEL_PATH,       # Defined in Section 5
            feature_columns_from_config=FEATURE_NAMES_FOR_MODEL, # Defined in Section 5
            scaler_path_from_config=YOUR_SCALER_PATH         # Defined in Section 5
        )

        if orchestrator_test_instance.simulator and orchestrator_test_instance.predictor and \
           orchestrator_test_instance.predictor.model and orchestrator_test_instance.predictor.scaler:
            print("\n--- Starting Orchestrator Test (short duration: 7s, interval: 1s) ---")
            # Set start_dashboard=False for this test to avoid Colab subprocess issues with Streamlit
            orchestrator_test_instance.start(
                simulation_duration_seconds=7,
                data_interval_seconds=1,
                allow_random_attacks=True,
                start_dashboard=False
            )

            print(f"\n⏳ Orchestrator test running for ~7 seconds... Check console for simulation and prediction logs.")
            # The threads run in the background. We wait here for them to do some work.
            # The simulation itself will stop after 7 seconds due to its internal duration.
            time.sleep(8) # Wait a bit longer than simulation duration to allow logs to print and threads to wind down.

            print("\n--- Stopping Orchestrator Test ---")
            orchestrator_test_instance.stop()

            print("\n--- Saving Orchestrator Test Session Results ---")
            orchestrator_test_instance.save_session_results("orchestrator_test_session")
        else:
            print("🔴 Orchestrator test skipped: Simulator, Predictor, Model, or Scaler failed to initialize properly within Orchestrator. Check earlier logs.")
    else:
        print("🔴 Orchestrator test skipped due to missing prerequisites or files.")

    print("\n--- End IDSOrchestrator Test ---")

print("\n✅ Section 6 (IDSOrchestrator Class Definition) is ready.")




IDSOrchestrator Class (Testing Block):

--- Initializing Orchestrator for Test ---
IDSSimulator initialized 7 sensors.
IDSSimulator initialized 5 network devices.
✅ Model successfully loaded from /content/drive/MyDrive/Colab Notebooks/results/lstm_model.h5
✅ Scaler successfully loaded from /content/drive/MyDrive/Colab Notebooks/results/scaler.pkl
✅ IDSOrchestrator initialized.

--- Starting Orchestrator Test (short duration: 7s, interval: 1s) ---

🚀 Starting IDS Orchestration... (Duration: 7, Data Interval: 1s)
Simulation run starting. Duration: 7. Interval: 1s. Random Attacks: True
Next potential random attack initiation around: 2025-05-28 23:46:42
Real-time logs will be written to: /content/drive/MyDrive/IDS_AI_Project/logs/ids_runtime_log_20250528_234629.jsonl
🚀 Simulation thread started. Random attacks: True. Interval: 1s.
🤖 Prediction thread started. Checking simulator data every 1s.
✅ IDS Orchestration components (Simulator & Predictor Threads) initiated.

⏳ Orchestrator test ru



🔎 Prediction @ 23:46:29: Pred=Attack, ActualSimType='normal' (Probs: [0.00, 1.00])
🔎 Prediction @ 23:46:30: Pred=Attack, ActualSimType='normal' (Probs: [0.00, 1.00])
🔎 Prediction @ 23:46:31: Pred=Attack, ActualSimType='normal' (Probs: [0.00, 1.00])
🔎 Prediction @ 23:46:32: Pred=Attack, ActualSimType='normal' (Probs: [0.00, 1.00])
🔎 Prediction @ 23:46:33: Pred=Attack, ActualSimType='normal' (Probs: [0.00, 1.00])
🔎 Prediction @ 23:46:34: Pred=Attack, ActualSimType='normal' (Probs: [0.00, 1.00])
Specified simulation duration reached.
Simulation run finished.
🔎 Prediction @ 23:46:35: Pred=Attack, ActualSimType='normal' (Probs: [0.00, 1.00])

--- Stopping Orchestrator Test ---

🛑 Stopping IDS Orchestration...
Attempting to stop prediction thread...
🤖 Prediction thread finished.
✅ Prediction thread stopped.
Attempting to stop simulation...
ℹ️ Simulation thread was not running or already stopped.
✅ IDS Orchestration fully stopped.

--- Saving Orchestrator Test Session Results ---

💾 Saving se

In [None]:
# Ensure necessary imports if running this cell standalone
# (os, sys, json, time, threading, datetime, argparse should be imported from Section 1)
# All classes (Sensor, NetworkDevice, IDSSimulator, IDSModelPredictor, IDSOrchestrator)
# and global configurations (BASE_DIR, YOUR_MODEL_PATH, etc.) should be defined from previous cells.

def main():
    """
    Main function to initialize and run the IDS system based on command-line arguments.
    In a Colab environment, we will simulate these arguments.
    """
    # For Colab, we'll define args manually instead of parsing them from command line
    # In a standalone .py script, argparse would parse them.
    class Args:
        def __init__(self):
            # --- Use the paths and configurations we've established ---
            self.model = YOUR_MODEL_PATH                # Defined in Section 5
            self.scaler = YOUR_SCALER_PATH              # Defined in Section 5
            self.feature_list = FEATURE_NAMES_FOR_MODEL # Defined in Section 5

            # --- Simulation Parameters (you can adjust these for testing) ---
            self.duration = 15  # Run for 15 seconds for this test
            self.interval = 1.0 # Generate data every 1 second
            self.no_random_attacks = False # Allow random attacks during the 15s run
            self.start_dashboard_process = False # Keep dashboard off for Colab test simplicity
            self.save_results_at_end = True # Save results after this run

    args = Args() # Simulate parsed arguments

    print("--- Initializing IDS System (via main function) ---")

    # Check if prerequisite global variables are available
    if 'YOUR_MODEL_PATH' not in globals() or \
       'FEATURE_NAMES_FOR_MODEL' not in globals(): # YOUR_SCALER_PATH can be None
        print("🔴 ERROR: Critical configuration variables (YOUR_MODEL_PATH, FEATURE_NAMES_FOR_MODEL) are not defined.")
        print("   Please ensure Section 5 was run and these variables are set.")
        return

    if not os.path.exists(args.model):
        print(f"🔴 ERROR: Model file not found at the specified path: {args.model}")
        print("   Please ensure the model path is correct and the file exists.")
        return

    if args.scaler and not os.path.exists(args.scaler):
        print(f"🔶 WARNING: Scaler file not found at {args.scaler}. Predictor will use a new, unfitted scaler.")


    # Create and configure orchestrator
    orchestrator = IDSOrchestrator(
        model_path_from_config=args.model,
        feature_columns_from_config=args.feature_list,
        scaler_path_from_config=args.scaler
        # dashboard_script_name can be customized if needed, default is "dashboard.py"
    )

    if not orchestrator.simulator or not orchestrator.predictor or not orchestrator.predictor.model:
        print("🔴 ERROR: Orchestrator components (simulator, predictor, or model) failed to initialize. Cannot start.")
        return

    try:
        print("\n--- Starting IDS System Operation (via main function) ---")
        orchestrator.start(
            simulation_duration_seconds=args.duration,
            data_interval_seconds=args.interval,
            allow_random_attacks=(not args.no_random_attacks),
            start_dashboard=args.start_dashboard_process
        )

        # Let the simulation run.
        # The orchestrator's threads will manage the duration.
        # We'll wait for a bit longer than the duration to allow for shutdown messages.
        if args.duration:
            print(f"\n⏳ Simulation will run for {args.duration} seconds. Monitoring threads...")
            # Check if threads are alive periodically
            main_thread_start_time = time.time()
            while time.time() - main_thread_start_time < (args.duration + 5): # Wait duration + buffer
                if orchestrator.simulator.simulation_thread and not orchestrator.simulator.simulation_thread.is_alive() and \
                   orchestrator.predictor.prediction_thread and not orchestrator.predictor.prediction_thread.is_alive():
                    print("ℹ️ Simulator and Predictor threads have completed their work based on duration.")
                    break
                time.sleep(1) # Check status every second
            print("ℹ️ Main function wait period complete.")
        else:
            # If running continuously (no duration), this part of main would typically not be reached
            # unless an external signal (like Ctrl+C in a script) stops it.
            # For Colab, continuous run would need manual interruption of the cell.
            print("ℹ️ Simulation set to run continuously (or until cell is interrupted).")
            # In a real script, you'd have a loop here or just let threads run until KeyboardInterrupt.
            # For this Colab cell, we'll just let the threads run for a bit if duration is None.
            if orchestrator.running: # Check if it actually started
                 time.sleep(10) # Example wait for a "continuous" test run

    except KeyboardInterrupt:
        print("\n🚨 KeyboardInterrupt received! Stopping IDS system...")
    except Exception as e:
        print(f"❌ An unexpected error occurred in main: {e}")
    finally:
        print("\n--- Shutting Down IDS System (via main function) ---")
        orchestrator.stop() # Ensure components are stopped

        if args.save_results_at_end:
            print("\n--- Saving Final Session Results (via main function) ---")
            orchestrator.save_session_results(f"ids_main_run_{datetime.now().strftime('%Y%m%d_%H%M%S')}")

    print("\n🏁 IDS System Main Execution Finished.")

# --- This is how you would typically run the main function ---
# We will call it directly in Colab for testing.
if ('google.colab' in sys.modules) or (__name__ == "__main__"):
    print("\nRunning main() function for a demonstration of the IDS System...")
    # Before running main, ensure all class definitions and global configs are loaded
    # (i.e., all previous cells for Sections 1-6 have been executed in this session)

    # Quick check for some essential globals from previous sections
    essential_globals_for_main = ['YOUR_MODEL_PATH', 'YOUR_SCALER_PATH', 'FEATURE_NAMES_FOR_MODEL', 'BASE_DIR', 'DATA_DIR', 'LOG_DIR', 'IDSSimulator', 'IDSModelPredictor', 'IDSOrchestrator']
    all_essentials_defined = True
    for var_name in essential_globals_for_main:
        if var_name not in globals():
            print(f"🔴 CRITICAL ERROR for main(): Global variable '{var_name}' is not defined. " \
                  "Please ensure all previous code sections (1-6) have been run successfully.")
            all_essentials_defined = False
            break

    if all_essentials_defined:
        main()
    else:
        print("\n❌ Main function execution aborted due to missing definitions from previous sections.")

else:
    print("Script loaded as a module, not running main() automatically.")

print("\n✅ Section 7 (Main Execution Block) is ready.")




Running main() function for a demonstration of the IDS System...
--- Initializing IDS System (via main function) ---
IDSSimulator initialized 7 sensors.
IDSSimulator initialized 5 network devices.
✅ Model successfully loaded from /content/drive/MyDrive/Colab Notebooks/results/lstm_model.h5
✅ Scaler successfully loaded from /content/drive/MyDrive/Colab Notebooks/results/scaler.pkl
✅ IDSOrchestrator initialized.

--- Starting IDS System Operation (via main function) ---

🚀 Starting IDS Orchestration... (Duration: 15, Data Interval: 1.0s)
Simulation run starting. Duration: 15. Interval: 1.0s. Random Attacks: True
Next potential random attack initiation around: 2025-05-28 23:47:03
Real-time logs will be written to: /content/drive/MyDrive/IDS_AI_Project/logs/ids_runtime_log_20250528_234637.jsonl
🚀 Simulation thread started. Random attacks: True. Interval: 1.0s.
🤖 Prediction thread started. Checking simulator data every 1.0s.
✅ IDS Orchestration components (Simulator & Predictor Threads) in

In [None]:
# Ensure necessary imports if running this cell standalone
# os, sys, json, time, threading, datetime, argparse were imported in Section 1.
# All classes (Sensor, NetworkDevice, IDSSimulator, IDSModelPredictor, IDSOrchestrator)
# and global configurations (BASE_DIR, YOUR_MODEL_PATH, etc.) should be defined
# from running the previous cells.

def main():
    """
    Main function to initialize and run the IDS system.
    In a Colab environment, we simulate command-line arguments.
    """
    # For Colab, we define args manually. In a standalone .py script, argparse would parse them.
    class Args:
        def __init__(self):
            # --- Use the paths and configurations established in previous sections ---
            self.model = YOUR_MODEL_PATH                # Should be globally defined from Section 5
            self.scaler = YOUR_SCALER_PATH              # Should be globally defined from Section 5
            self.feature_list = FEATURE_NAMES_FOR_MODEL # Should be globally defined from Section 5

            # --- Simulation Parameters (adjust for different test runs) ---
            self.duration = 20  # Run for 20 seconds for this demonstration
            self.interval = 1.0 # Generate data every 1 second
            self.no_random_attacks = False # Allow random attacks during the run
            self.start_dashboard_process = False # Keep dashboard off for Colab simplicity
            self.save_results_at_end = True # Save results after this run

    args = Args() # Simulate parsed arguments

    print("--- Initializing IDS System (via main function) ---")

    # Basic checks for essential configurations
    if not all(arg is not None for arg in [args.model, args.feature_list]): # Scaler can be None and handled by Predictor
        print("🔴 ERROR: Critical configurations (model path or feature list) are not set. Cannot proceed.")
        return

    if not os.path.exists(args.model):
        print(f"🔴 ERROR: Model file not found at the specified path: {args.model}")
        return

    if args.scaler and not os.path.exists(args.scaler):
        print(f"🔶 WARNING: Specified Scaler file not found at {args.scaler}. "
              "Predictor will use a new, unfitted scaler (suboptimal).")

    # Create and configure orchestrator
    orchestrator = IDSOrchestrator(
        model_path_from_config=args.model,
        feature_columns_from_config=args.feature_list,
        scaler_path_from_config=args.scaler
    )

    # Check if orchestrator's components initialized correctly
    if not orchestrator.simulator or \
       not orchestrator.predictor or \
       (orchestrator.predictor and not orchestrator.predictor.model) or \
       (orchestrator.predictor and not orchestrator.predictor.scaler and args.scaler): # If scaler was expected but not loaded
        print("🔴 ERROR: Orchestrator components (simulator, predictor, model, or expected scaler) "
              "failed to initialize properly. Cannot start.")
        return

    try:
        print("\n--- Starting IDS System Operation (via main function) ---")
        orchestrator.start(
            simulation_duration_seconds=args.duration,
            data_interval_seconds=args.interval,
            allow_random_attacks=(not args.no_random_attacks),
            start_dashboard=args.start_dashboard_process
        )

        if args.duration:
            print(f"\n⏳ Simulation will run for {args.duration} seconds. Monitoring...")
            # The simulation runs in a thread managed by the orchestrator.
            # This main thread will wait for the duration plus a buffer to allow for clean shutdown.
            # A more robust implementation might join the orchestrator's main thread if it had one,
            # or use other synchronization primitives.
            time.sleep(args.duration + 5) # Wait for duration + buffer for logs and shutdown
            print("ℹ️ Main function wait period for simulation duration complete.")
        else:
            print("ℹ️ Simulation set to run continuously. Manually interrupt cell to stop.")
            # In a script, this would loop until KeyboardInterrupt. In Colab, cell needs manual stop.
            while orchestrator.running: # Fallback if orchestrator manages a global running state for main
                time.sleep(1)


    except KeyboardInterrupt:
        print("\n🚨 KeyboardInterrupt received! Initiating graceful shutdown of IDS system...")
    except Exception as e:
        print(f"❌ An unexpected error occurred in main execution: {e}")
        import traceback
        traceback.print_exc() # Print full traceback for unexpected errors
    finally:
        print("\n--- Shutting Down IDS System (via main function) ---")
        if 'orchestrator' in locals() and orchestrator: # Ensure orchestrator was initialized
            orchestrator.stop() # Ensure components are stopped

            if args.save_results_at_end:
                print("\n--- Saving Final Session Results (via main function) ---")
                orchestrator.save_session_results(f"ids_main_run_{datetime.now().strftime('%Y%m%d_%H%M%S')}")

    print("\n🏁 IDS System Main Execution Finished.")

# This block ensures main() is called when the cell is run.
if ('google.colab' in sys.modules) or (__name__ == "__main__"):
    print("\nRunning main() function for a demonstration of the IDS System...")
    # Check if essential global variables from previous sections are available
    essential_globals = ['YOUR_MODEL_PATH', 'YOUR_SCALER_PATH', 'FEATURE_NAMES_FOR_MODEL',
                         'BASE_DIR', 'DATA_DIR', 'LOG_DIR',
                         'IDSSimulator', 'IDSModelPredictor', 'IDSOrchestrator',
                         'Sensor', 'NetworkDevice', 'SENSOR_CONFIG', 'ATTACK_TYPES']
    all_defined = True
    for var_name in essential_globals:
        if var_name not in globals():
            print(f"🔴 CRITICAL ERROR for main(): Global variable or class '{var_name}' is not defined. "
                  "Please ensure all previous code sections (1-6) have been run successfully in this session.")
            all_defined = False
            break

    if all_defined:
        main()
    else:
        print("\n❌ Main function execution aborted due to missing definitions from previous sections.")
else:
    print("Script loaded as a module, not running main() automatically.")

print("\n✅ Section 7 (Main Execution Block) is ready.")




Running main() function for a demonstration of the IDS System...
--- Initializing IDS System (via main function) ---
IDSSimulator initialized 7 sensors.
IDSSimulator initialized 5 network devices.
✅ Model successfully loaded from /content/drive/MyDrive/Colab Notebooks/results/lstm_model.h5
✅ Scaler successfully loaded from /content/drive/MyDrive/Colab Notebooks/results/scaler.pkl
✅ IDSOrchestrator initialized.

--- Starting IDS System Operation (via main function) ---

🚀 Starting IDS Orchestration... (Duration: 20, Data Interval: 1.0s)
Simulation run starting. Duration: 20. Interval: 1.0s. Random Attacks: True
Next potential random attack initiation around: 2025-05-28 23:47:26
Real-time logs will be written to: /content/drive/MyDrive/IDS_AI_Project/logs/ids_runtime_log_20250528_234658.jsonl
🚀 Simulation thread started. Random attacks: True. Interval: 1.0s.
🤖 Prediction thread started. Checking simulator data every 1.0s.
✅ IDS Orchestration components (Simulator & Predictor Threads) in