In [25]:
import pandas as pd
import matplotlib.pyplot as plt
import json
import ast


In [26]:
f = open("simulation_metrics.log", "r")

In [27]:
def pad_arrays_in_dict(dictionary, length):
    for key, value in dictionary.items():
        if isinstance(value, list):
            # Pad the list to the specified length
            dictionary[key] = value + [None] * (length - len(value))
    return dictionary

In [31]:
import csv
import re
import numpy as np

def process_data_to_csv(input_file_path, output_file_path):
    with open(input_file_path, 'r') as input_file:
        data = input_file.read()
    
    # Initialize CSV file with headers
    with open(output_file_path, mode='w', newline='') as csv_file:
        csv_writer = csv.writer(csv_file)
        # Define the range and bins
        range_min = 0
        range_max = 10000
        interval = 500

        # Generate column names for x values
        x_columns = [f"x_{i}" for i in range(range_min, range_max + interval, interval)]

        # Update CSV header with x columns
        csv_header = ["curr_attack", "mtd_freq", "compromised_num", "interrupted", "mtd", "roa", "impact", "complexity", "total had os dependency"] + x_columns

        # Write the CSV header
        csv_writer.writerow(csv_header)

       
        # Split the data into blocks for each STATS section
        blocks = data.split("STATS BEFORE MTD OPERATION")[1:]
        items = 0
        for block in blocks:
            lines = block.strip().split("\n")
            curr_attack = re.search(r"Current Attack: (.+)", lines[0]).group(1)
            mtd_freq = re.search(r"MTD Frequency: (.+)", lines[1]).group(1)
            compromised_num = re.search(r"Compromised Number: (.+)", lines[2]).group(1)
            interrupted = "Interrupted" in block
            mtd = re.search(r"MTD: (\w+)", lines[3]).group(1)
 
            # Pre-process Host Compromises dictionary
            vuln_exploit_dict = ast.literal_eval(re.search(r"Vulnerabilities Exploited: ({.*})", lines[7]).group(1))
            max_length = max(len(arr) for arr in vuln_exploit_dict.values() if isinstance(arr, list))
            padded_dict = pad_arrays_in_dict(vuln_exploit_dict , max_length)
            vuln_exploit = pd.DataFrame(padded_dict)

            
            if not vuln_exploit.empty:
             # Sample data (replace this with your actual data)
                items = vuln_exploit['x'].value_counts().sort_index()

                # Define the range and bins
                range_min = 0
                range_max = 10000
                interval = 500

                # Create bins
                bins = np.arange(range_min, range_max + interval, interval)

                # Pad and map index to the range
                padded_index = pd.cut(items.index, bins, labels=bins[:-1])

                # Round the index to the nearest 500
                rounded_index = padded_index.astype(int)

                padded_values = np.zeros(20)  # Create an array of zeros with length 20 (for 10000 in intervals of 500)
                for index, value in zip(rounded_index, items.values):
                    padded_values[index // 500] += value

                # Print padded values
                # print(padded_values)
                roa = vuln_exploit['roa'].mean()
                impact = vuln_exploit['impact'].mean()
                complexity = vuln_exploit['complexity'].mean()
                total_os_dependency = vuln_exploit['total had os dependency'].mean()
            else:
                roa = 0
                impact = 0
                complexity = 9
                total_os_dependency = 0
                padded_values = [0] * 20

            
            row_values = [curr_attack, mtd_freq, compromised_num, interrupted, mtd, roa, impact, complexity, total_os_dependency] + list(padded_values)
            csv_writer.writerow(row_values)
# Example usage
input_file_path = 'simulation_metrics.log'
output_file_path = 'simulation_metrics.csv'
process_data_to_csv(input_file_path, output_file_path)
