In [59]:
import pandas as pd
import matplotlib.pyplot as plt
import json
import ast


In [60]:
f = open("simulation_metrics.log", "r")

In [61]:
def pad_arrays_in_dict(dictionary, length):
    for key, value in dictionary.items():
        if isinstance(value, list):
            # Pad the list to the specified length
            dictionary[key] = value + [None] * (length - len(value))
    return dictionary

In [62]:
import csv
import re

def process_data_to_csv(input_file_path, output_file_path):
    with open(input_file_path, 'r') as input_file:
        data = input_file.read()
    
    # Initialize CSV file with headers
    with open(output_file_path, mode='w', newline='') as csv_file:
        csv_writer = csv.writer(csv_file)
        csv_writer.writerow(["curr_attack", "mtd_freq", "compromised_num", "interrupted", "mtd", "roa", "impact", "complexity", "total had os dependency"])

        # Split the data into blocks for each STATS section
        blocks = data.split("STATS BEFORE MTD OPERATION")[1:]
        
        for block in blocks:
            lines = block.strip().split("\n")
            curr_attack = re.search(r"Current Attack: (.+)", lines[0]).group(1)
            mtd_freq = re.search(r"MTD Frequency: (.+)", lines[1]).group(1)
            compromised_num = re.search(r"Compromised Number: (.+)", lines[2]).group(1)
            interrupted = "Interrupted" in block
            mtd = re.search(r"MTD: (\w+)", lines[3]).group(1)
 
            # Pre-process Host Compromises dictionary
            vuln_exploit_dict = ast.literal_eval(re.search(r"Vulnerabilities Exploited: ({.*})", lines[7]).group(1))
            max_length = max(len(arr) for arr in vuln_exploit_dict.values() if isinstance(arr, list))
            padded_dict = pad_arrays_in_dict(vuln_exploit_dict , max_length)
            vuln_exploit = pd.DataFrame(padded_dict)

            
            if not vuln_exploit.empty:
                roa = vuln_exploit['roa'].mean()
                impact = vuln_exploit['impact'].mean()
                complexity = vuln_exploit['complexity'].mean()
                total_os_dependency = vuln_exploit['total had os dependency'].mean()
            else:
                roa = 0
                impact = 0
                complexity = 9
                total_os_dependency = 0


            # Write the extracted data to the CSV
            csv_writer.writerow([curr_attack, mtd_freq, compromised_num, interrupted, mtd, roa, impact, complexity, total_os_dependency])

# Example usage
input_file_path = 'simulation_metrics.log'
output_file_path = 'simulation_metrics.csv'
process_data_to_csv(input_file_path, output_file_path)
