In [None]:

"""
Module: system_monitor.py
Purpose: Combined System Monitoring & Data Acquisition and Data Preprocessing & Aggregation
         for the Adaptive Resource Allocation System.
         
This module performs the following:
1. Data Acquisition: Collects raw system metrics (CPU, memory, disk I/O, network, and process details) using the psutil library.
2. Data Preprocessing & Aggregation: Uses a fixed-size sliding window to compute statistical aggregates (average, minimum, maximum, and standard deviation)
   for key metrics (CPU and memory usage) and outputs the aggregated results in JSON format along with a timestamp.
"""

import psutil       # Library for system monitoring
import json         # For serializing data into JSON format
import time         # For time-related functions
import collections  # Provides deque for fixed-size sliding windows
import math         # For computing standard deviation

# ---------------------------
# Data Acquisition Functions
# ---------------------------
def collect_cpu_metrics():
    """
    Collects CPU-related metrics.
    
    Returns:
        dict: Contains overall CPU usage percentage, per-core usage,
              and CPU frequencies (if available).
    """
    # Measure overall CPU usage over 0.5 seconds
    overall_cpu = psutil.cpu_percent(interval=0.5)
    # Measure per-core CPU usage over 0.5 seconds
    per_cpu = psutil.cpu_percent(interval=0.5, percpu=True)
    
    cpu_data = {
        "overall_cpu_percent": overall_cpu,
        "per_cpu_percent": per_cpu
    }
    
    # Attempt to collect CPU frequency for each core; if not available, set to None
    try:
        cpu_data["cpu_freq"] = [freq.current for freq in psutil.cpu_freq(percpu=True)]
    except Exception:
        cpu_data["cpu_freq"] = None
    
    return cpu_data

def collect_memory_metrics():
    """
    Collects memory-related metrics.
    
    Returns:
        dict: Contains total, used, and available memory, memory usage percentage, and swap details.
    """
    mem = psutil.virtual_memory()
    swap = psutil.swap_memory()
    
    return {
        "total_memory": mem.total,
        "used_memory": mem.used,
        "available_memory": mem.available,
        "memory_percent": mem.percent,
        "swap_total": swap.total,
        "swap_used": swap.used,
        "swap_percent": swap.percent
    }

def collect_disk_metrics():
    """
    Collects disk I/O metrics.
    
    Returns:
        dict: Contains disk read/write operation counts and byte totals.
    """
    disk_io = psutil.disk_io_counters()
    return {
        "read_count": disk_io.read_count,
        "write_count": disk_io.write_count,
        "read_bytes": disk_io.read_bytes,
        "write_bytes": disk_io.write_bytes
    }

def collect_network_metrics():
    """
    Collects network I/O metrics.
    
    Returns:
        dict: Contains bytes sent/received, packet counts, and error statistics.
    """
    net_io = psutil.net_io_counters()
    return {
        "bytes_sent": net_io.bytes_sent,
        "bytes_recv": net_io.bytes_recv,
        "packets_sent": net_io.packets_sent,
        "packets_recv": net_io.packets_recv,
        "errin": net_io.errin,
        "errout": net_io.errout,
        "dropin": net_io.dropin,
        "dropout": net_io.dropout
    }

def collect_process_metrics(top_n=5):
    """
    Collects metrics for running processes.
    
    Returns the top 'top_n' processes sorted by memory usage.
    
    Parameters:
        top_n (int): Number of processes to return (default is 5).
    
    Returns:
        list: A list of dictionaries, each containing process details (PID, name, CPU and memory usage, and I/O counters).
    """
    processes = []
    for proc in psutil.process_iter(['pid', 'name', 'cpu_percent', 'memory_percent', 'io_counters']):
        try:
            processes.append(proc.info)
        except (psutil.NoSuchProcess, psutil.AccessDenied):
            continue
    # Sort processes by memory usage (descending) and return top_n
    processes = sorted(processes, key=lambda p: p.get("memory_percent", 0), reverse=True)[:top_n]
    return processes

def collect_all_metrics():
    """
    Aggregates all raw system metrics into a single dictionary.
    
    Returns:
        dict: Contains a timestamp and dictionaries of CPU, memory, disk, network, and process metrics.
    """
    data = {
        "timestamp": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()),
        "cpu": collect_cpu_metrics(),         # Nested dictionary for CPU metrics
        "memory": collect_memory_metrics(),     # Nested dictionary for memory metrics
        "disk": collect_disk_metrics(),         # Disk I/O metrics
        "network": collect_network_metrics(),   # Network metrics
        "processes": collect_process_metrics()  # List of top processes
    }
    return data

# ---------------------------------------
# Data Preprocessing & Aggregation Class
# ---------------------------------------
class MetricAggregator:
    """
    Aggregates raw system metrics using a fixed-size sliding window.
    Computes statistical measures: average, minimum, maximum, and standard deviation.
    """
    
    def __init__(self, window_size=5):
        """
        Initializes the MetricAggregator with a fixed-size sliding window.
        
        Parameters:
            window_size (int): The number of recent samples to aggregate.
        """
        self.window_size = window_size
        # Create deques for CPU and memory percentages with fixed window size
        self.metrics_windows = {
            "cpu_percent": collections.deque(maxlen=window_size),
            "memory_percent": collections.deque(maxlen=window_size)
        }
    
    def add_raw_metrics(self, raw_metrics):
        """
        Adds raw metric values to the sliding windows.
        
        Parameters:
            raw_metrics (dict): Must include nested keys:
                - "cpu": {"overall_cpu_percent": float, ...}
                - "memory": {"memory_percent": float, ...}
        
        Raises:
            ValueError: If the required keys are missing.
        """
        try:
            # Adjust to the nested structure from collect_all_metrics()
            cpu = raw_metrics["cpu"]["overall_cpu_percent"]
            memory = raw_metrics["memory"]["memory_percent"]
        except KeyError as e:
            raise ValueError("Missing required metric key: " + str(e))
        
        # Append the values into their respective sliding windows
        self.metrics_windows["cpu_percent"].append(cpu)
        self.metrics_windows["memory_percent"].append(memory)
    
    def _compute_statistics(self, data_window):
        """
        Computes statistics (average, min, max, standard deviation) for a data window.
        
        Parameters:
            data_window (deque): A collection of numerical values.
        
        Returns:
            dict: Contains keys "avg", "min", "max", and "std".
        """
        n = len(data_window)
        if n == 0:
            return {"avg": 0, "min": 0, "max": 0, "std": 0}
        avg_val = sum(data_window) / n
        min_val = min(data_window)
        max_val = max(data_window)
        std_val = math.sqrt(sum((x - avg_val) ** 2 for x in data_window) / n)
        return {"avg": avg_val, "min": min_val, "max": max_val, "std": std_val}
    
    def get_aggregated_metrics(self):
        """
        Aggregates metrics from the sliding windows and adds a timestamp.
        
        Returns:
            dict: Aggregated statistics for each key metric.
        """
        aggregated = {}
        for key, window in self.metrics_windows.items():
            aggregated[key] = self._compute_statistics(window)
        aggregated["timestamp"] = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime())
        return aggregated
    
    def get_aggregated_metrics_json(self):
        """
        Returns the aggregated metrics in JSON format.
        
        Returns:
            str: JSON-formatted aggregated metrics.
        """
        aggregated = self.get_aggregated_metrics()
        return json.dumps(aggregated, indent=4)

# ---------------------------
# Main Execution Flow
# ---------------------------
def main():
    """
    Main function that combines data acquisition and aggregation.
    - Collects raw system metrics.
    - Feeds them into the aggregator.
    - Outputs both raw and aggregated metrics in JSON format.
    """
    # Create an instance of MetricAggregator with a sliding window of 5 samples
    aggregator = MetricAggregator(window_size=5)
    
    # Define the sampling interval in seconds
    sampling_interval = 3
    
    try:
        while True:
            # Collect raw system metrics
            raw_metrics = collect_all_metrics()
            # Print raw metrics for debugging/verification
            print("Raw Metrics:")
            print(json.dumps(raw_metrics, indent=4))
            
            # Add the collected raw metrics to the aggregator
            aggregator.add_raw_metrics(raw_metrics)
            
            # Retrieve the aggregated metrics in JSON format
            aggregated_json = aggregator.get_aggregated_metrics_json()
            print("\nAggregated Metrics (JSON):")
            print(aggregated_json)
            
            # Wait for the next sampling interval
            time.sleep(sampling_interval)
    except KeyboardInterrupt:
        print("\nMonitoring stopped by user.")

if __name__ == "__main__":
    main()


Raw Metrics:
{
    "timestamp": "2025-03-23T11:06:17Z",
    "cpu": {
        "overall_cpu_percent": 1.1,
        "per_cpu_percent": [
            15.2,
            3.0,
            6.2,
            3.1,
            0.0,
            0.0,
            0.0,
            0.0,
            0.0,
            0.0,
            0.0,
            0.0
        ],
        "cpu_freq": [
            2100.0
        ]
    },
    "memory": {
        "total_memory": 16456368128,
        "used_memory": 8905510912,
        "available_memory": 7550857216,
        "memory_percent": 54.1,
        "swap_total": 2550136832,
        "swap_used": 2529255424,
        "swap_percent": 99.2
    },
    "disk": {
        "read_count": 1361987,
        "write_count": 1646533,
        "read_bytes": 36428710912,
        "write_bytes": 42351164928
    },
    "network": {
        "bytes_sent": 7822295,
        "bytes_recv": 62664622,
        "packets_sent": 29282,
        "packets_recv": 59242,
        "errin": 0,
        "errou