In [3]:
import re
import pandas as pd

In [4]:
# Function to extract data from a single log entry
def extract_data(log_entry):
    lines = log_entry.splitlines()

    data = []
    for line in lines:
        # Look for loss line
        loss_match = re.search(r"app_fit: losses_centralized \[(.+)\]", line)
        if loss_match:
            losses_str = loss_match.group(1)
            losses = re.findall(r"\((\d+), (\d+\.\d+)\)", losses_str)
            for round_str, loss_str in losses:
                round_num = int(round_str)
                loss = float(loss_str)
                data.append({'round': round_num, 'loss': loss})

        # Look for accuracy line
        accuracy_match = re.search(r"app_fit: metrics_centralized \{'accuracy': \[(.+)\]\}", line)
        if accuracy_match:
            accuracies_str = accuracy_match.group(1)
            accuracies = re.findall(r"\((\d+), (\d+\.\d+)\)", accuracies_str)
            for round_str, accuracy_str in accuracies:
                round_num = int(round_str)
                accuracy = float(accuracy_str)
                # Find the existing data point for this round and add accuracy
                for item in data:
                    if item['round'] == round_num:
                        item['accuracy'] = accuracy
                        break

    return data

In [7]:
EXPERIMENT = '100c_100r_nosample'
ALGORITHM = 'fedkrum'

# Read the log file (replace with the correct file path)
with open(f'{EXPERIMENT}/{ALGORITHM}_log.txt', 'r') as f:
    log_data = f.read() 

# Split entries based on a line containing 'Starting Flower server,'
log_entries = re.split(r'(?m)^.*Starting Flower server,.*\n?', log_data)

# Process all entries and combine data 
all_data = []
num_count = 1
for entry in log_entries:
    if entry:  # Skip empty entries
        extracted_data = extract_data(entry)
        for item in extracted_data:
            item['exp_num'] = num_count 
        all_data += extracted_data
        num_count += 1

# Create a Pandas DataFrame
df = pd.DataFrame(all_data)
df = df[['exp_num', 'round', 'loss', 'accuracy']]

# Export to Excel (replace with your desired name)
df.to_excel(f'{EXPERIMENT}/transformed/{ALGORITHM}_{EXPERIMENT}.xlsx', index=False)