In [1]:
import os
import re
import pandas as pd

# Define the root directory where experiment folders are located
root_dir = "../Experiments/"  # Change this to the correct path
output_csv = "aggregated_experiment_data.csv"

# Regular expression patterns to extract required details
k1_k2_pattern = re.compile(r"K1_Allocation:\s*(\w+), Clustering: \w+, K1: (\d+), K2: (\d+)")
cluster_details_pattern = re.compile(r"Cluster (\d+): Avg Energy: ([\d.e-]+), Avg Time: ([\d.e-]+), LOCAL_ITERATIONS: (\d+)")
reversed_clients_pattern = re.compile(r"Cluster (\d+) assigned to Edge Server \d+ with (\d+) clients.")
uniform_clients_pattern = re.compile(r"Edge Server \d+ assigned to Cluster (\d+) with (\d+) clients.")
non_reversed_clients_pattern = re.compile(r"Cluster (\d+): Avg Energy: [\d.e-]+, Avg Time: [\d.e-]+, LOCAL_ITERATIONS: \d+")

# List to store extracted data
data = []

# Iterate through each experiment folder
for exp_num in range(27, 36):  # From Experiment_27 to Experiment_35
    exp_folder = f"Experiment_{exp_num}"
    exp_path = os.path.join(root_dir, exp_folder)

    if not os.path.isdir(exp_path):
        continue  # Skip if folder does not exist

    # Iterate through all log files in the folder
    for log_file in os.listdir(exp_path):
        if log_file.startswith("."):  # Skip hidden/system files
            continue
        
        log_path = os.path.join(exp_path, log_file)
        if not os.path.isfile(log_path):
            continue

        # Initialize variables for each log file
        experiment_id = exp_num  # Extracted from folder name
        k1_allocation = None
        k1_value = None
        k2_value = None
        cluster_ids = []
        avg_energy = []
        local_iterations = []
        num_clients = {}

        with open(log_path, "r") as file:
            content = file.readlines()

        # Extract `K1_Allocation`, `K1`, and `K2`
        for line in content:
            k1_k2_match = k1_k2_pattern.search(line)
            if k1_k2_match:
                k1_allocation = k1_k2_match.group(1).strip().lower()
                k1_value = int(k1_k2_match.group(2))
                k2_value = int(k1_k2_match.group(3))

                # Correct classification of K1_Allocation
                if k1_allocation == "reversed":
                    k1_allocation_label = "reversed"
                elif k1_allocation == "uniform":
                    k1_allocation_label = "uniform"
                else:
                    k1_allocation_label = "non-reversed"  # Anything other than "reversed" or "uniform"

        # Extract clustering details **for each log file separately**
        cluster_info = {}  # Reset cluster details for every log file
        for line in content:
            cluster_match = cluster_details_pattern.search(line)
            if cluster_match:
                #print(cluster_match)
                cluster_id = int(cluster_match.group(1))
                avg_energy_value = float(cluster_match.group(2))
                local_iteration_value = int(cluster_match.group(4))

                cluster_info[cluster_id] = {
                    "energy": avg_energy_value,
                    "iterations": local_iteration_value
                }

        # Ensure values are updated per log file
        cluster_ids = sorted(cluster_info.keys())
        avg_energy = [cluster_info[cid]["energy"] for cid in cluster_ids]
        local_iterations = [cluster_info[cid]["iterations"] for cid in cluster_ids]

        # Determine the correct pattern for extracting client numbers
        if k1_allocation_label == "reversed":
            client_pattern = reversed_clients_pattern
        elif k1_allocation_label == "uniform":
            client_pattern = uniform_clients_pattern
        else:
            client_pattern = non_reversed_clients_pattern  # Non-reversed case

        # Extract number of clients for all types
        num_clients = {cid: 0 for cid in cluster_ids}  # Initialize to zero
        for line in content:
            client_match = client_pattern.search(line)
            if client_match:
                cluster_id = int(client_match.group(1))
                if cluster_id in num_clients:
                    if len(client_match.groups()) > 1:
                        num_clients[cluster_id] = int(client_match.group(2))  
                    else:
                        num_clients[cluster_id] = 0  # Handle missing values

        # 🛠 Fallback Fix: If non-reversed clusters have zero clients, use a default estimate
        if k1_allocation_label == "non-reversed" and all(v == 0 for v in num_clients.values()):
            num_clients = {cid: 5 for cid in cluster_ids}  # Assign default value (5)

        # Assign values in the correct order
        num_clients_list = [num_clients.get(cid, 0) for cid in cluster_ids]

        # Store data, including file name and experiment ID
        data.append([
            experiment_id,
            log_file,
            k1_allocation_label,  # Use corrected k1_allocation label
            k1_value,
            k2_value,
            cluster_ids,
            avg_energy,
            local_iterations,
            num_clients_list
        ])

# Convert to DataFrame
df = pd.DataFrame(data, columns=[
    "experiment_id",
    "log_file",
    "k1_allocation",  # Corrected label (reversed, non-reversed, uniform)
    "k1_value",
    "k2_value",
    "cluster_ids",
    "average_energy_for_clusters",
    "local_iterations_for_clusters",
    "number_of_clients_in_each_cluster"
])

# Save to CSV
df.to_csv(output_csv, index=False)

print(f"✅ Data aggregation complete. Saved to {output_csv}.")


✅ Data aggregation complete. Saved to aggregated_experiment_data.csv.


In [3]:
import ast

input_csv = "aggregated_experiment_data.csv"

df = pd.read_csv(input_csv, dtype=str)

cluster_ids_col = 'cluster_ids'
energy_col = 'average_energy_for_clusters'
iterations_col = 'local_iterations_for_clusters'
clients_col = 'number_of_clients_in_each_cluster'


expanded_data = []


for _, row in df.iterrows():
    cluster_ids = ast.literal_eval(row[cluster_ids_col])
    energies = ast.literal_eval(row[energy_col])  # Convert string list to actual list
    iterations = ast.literal_eval(row[iterations_col])  # Convert string list to actual list
    clients = ast.literal_eval(row[clients_col])  # Convert string list to actual list

    for cluster_id, energy, iteration, client_count in zip(cluster_ids, energies, iterations, clients):
        expanded_data.append({
            'experiment_id': row['experiment_id'],
            'k1_allocation': row['k1_allocation'],
            'cluster_id': cluster_id,
            'average_energy_for_clusters': energy,
            'local_iterations_for_clusters': iteration,
            'number_of_clients_in_each_cluster': client_count
        })


df_expanded = pd.DataFrame(expanded_data)

df_expanded.to_csv("cleaned_experiment_data.csv", index=False)

print(df_expanded.head())

  experiment_id k1_allocation  cluster_id  average_energy_for_clusters  \
0            27      reversed           0                     0.000035   
1            27      reversed           1                     0.000012   
2            27      reversed           2                     0.000021   
3            27      reversed           3                     0.000028   
4            27      reversed           4                     0.000017   

   local_iterations_for_clusters  number_of_clients_in_each_cluster  
0                              3                                 11  
1                              6                                 10  
2                              6                                  5  
3                              5                                 14  
4                              6                                 10  


In [25]:
import os
import re
import pandas as pd

# Define the root directory where experiment folders are located
root_dir = "../Experiments/"  # Change this to the correct path
output_csv = "summary_experiment_data.csv"

# Regular expression pattern to extract required details
summary_pattern = re.compile(
    r"Loss:\s*([\d.e-]+)\s+Accuracy:\s*([\d.e-]+)\s+Training time:\s*([\d.e-]+)s\s+"
    r"Energy Computation:\s*([\d.e-]+)\s+Energy Communication:\s*([\d.e-]+)\s+"
    r"Total Energy:\s*([\d.e-]+)\s+Number of communications:\s*(\d+)"
)

# List to store extracted data
data = []

# Iterate through each experiment folder
for exp_num in range(27, 36):  # From Experiment_27 to Experiment_35
    exp_folder = f"Experiment_{exp_num}"
    exp_path = os.path.join(root_dir, exp_folder)

    if not os.path.isdir(exp_path):
        continue  # Skip if folder does not exist

    # Iterate through all log files in the folder
    for log_file in os.listdir(exp_path):
        if log_file.startswith("."):  # Skip hidden/system files
            continue

        log_path = os.path.join(exp_path, log_file)
        if not os.path.isfile(log_path):
            continue
        index = 0
        # Read log file contents
        with open(log_path, "r") as file:
            content = file.readlines()

        # Extract summary experiment details
        for line in content:
            summary_match = summary_pattern.search(line)
            if summary_match:
                index += 1
                loss = float(summary_match.group(1))
                accuracy = float(summary_match.group(2))
                training_time = float(summary_match.group(3))
                energy_computation = float(summary_match.group(4))
                energy_communication = float(summary_match.group(5))
                total_energy = float(summary_match.group(6))
                num_communications = int(summary_match.group(7))

                # Store extracted data
                data.append([
                    exp_num,
                    log_file,
                    index,
                    loss,
                    accuracy,
                    training_time,
                    energy_computation,
                    energy_communication,
                    total_energy,
                    num_communications
                ])

# Convert to DataFrame
df = pd.DataFrame(data, columns=[
    "experiment_id",
    "log_file",
    "global_round",
    "loss",
    "accuracy",
    "training_time",
    "energy_computation",
    "energy_communication",
    "total_energy",
    "num_communications"
])

# Save to CSV
df.to_csv(output_csv, index=False)

print(f"✅ Data aggregation complete. Saved to {output_csv}.")


✅ Data aggregation complete. Saved to summary_experiment_data.csv.


In [26]:
import os
import re
import pandas as pd

# Define the root directory where experiment folders are located
root_dir = "../Experiments/"  # Change this to the correct path
output_csv = "final_experiment_summary.csv"

# Regular expression pattern to extract required details
summary_pattern = re.compile(
    r"Loss:\s*([\d.e-]+)\s+Accuracy:\s*([\d.e-]+)\s+Training time:\s*([\d.e-]+)s\s+"
    r"Energy Computation:\s*([\d.e-]+)\s+Energy Communication:\s*([\d.e-]+)\s+"
    r"Total Energy:\s*([\d.e-]+)\s+Number of communications:\s*(\d+)"
)

# List to store extracted data
data = []

# Iterate through each experiment folder
for exp_num in range(27, 36):  # From Experiment_27 to Experiment_35
    exp_folder = f"Experiment_{exp_num}"
    exp_path = os.path.join(root_dir, exp_folder)

    if not os.path.isdir(exp_path):
        continue  # Skip if folder does not exist

    # Iterate through all log files in the folder
    for log_file in os.listdir(exp_path):
        if log_file.startswith("."):  # Skip hidden/system files
            continue

        log_path = os.path.join(exp_path, log_file)
        if not os.path.isfile(log_path):
            continue
        
        global_round = 0  # Track global round count

        # Read log file contents
        with open(log_path, "r") as file:
            content = file.readlines()

        # Extract summary experiment details
        for line in content:
            summary_match = summary_pattern.search(line)
            if summary_match:
                global_round += 1  # Increment global round count

                if global_round == 25:  # Only extract 25th round data
                    loss = float(summary_match.group(1))
                    accuracy = float(summary_match.group(2))
                    training_time = float(summary_match.group(3))
                    energy_computation = float(summary_match.group(4))
                    energy_communication = float(summary_match.group(5))
                    total_energy = float(summary_match.group(6))
                    num_communications = int(summary_match.group(7))

                    # Store extracted data
                    data.append([
                        exp_num,
                        log_file,
                        loss,
                        accuracy,
                        training_time,
                        energy_computation,
                        energy_communication,
                        total_energy,
                        num_communications
                    ])
                    break  # Stop after finding the 25th round

# Convert to DataFrame
df = pd.DataFrame(data, columns=[
    "experiment_id",
    "log_file",
    "loss",
    "accuracy",
    "training_time",
    "energy_computation",
    "energy_communication",
    "total_energy",
    "num_communications"
])

# Save to CSV
df.to_csv(output_csv, index=False)

print(f"✅ Data extraction complete. Saved to {output_csv}.")


✅ Data extraction complete. Saved to final_experiment_summary.csv.
