In [23]:
import pandas as pd
import re

In [24]:
# Step 1: Read your data (assuming it's in a file named 'data.txt')
with open('logs.txt', 'r') as file:
    text_data = file.read()

# Step 2: Split the data into sections
sections = text_data.split('logs/experiment/fedkrum_log.txt:')
sections = sections[1:]

In [25]:
# Step 3: Extract data using regular expressions
loss_pattern = r'\(([\d]+), ([\d.]+)\)'
metric_pattern = r'\(([\d]+), ([\d.]+)\)'

data = []
for i in range(10):
    losses_section = sections[i]
    metrics_section = sections[i+1]

    losses = re.findall(loss_pattern, losses_section)
    metrics = re.findall(metric_pattern, metrics_section)

    df = pd.DataFrame(losses, columns=['round', 'loss'])
    df['round'] = df['round'].astype(int)
    df['loss'] = df['loss'].astype(float)

    metrics_df = pd.DataFrame(metrics, columns=['round', 'accuracy'])
    metrics_df['round'] = metrics_df['round'].astype(int)
    metrics_df['accuracy'] = metrics_df['accuracy'].astype(float)

    df['exp_num'] = i+1
    metrics_df['exp_num'] = i+1

    merged_df = pd.merge(df, metrics_df, on=['round', 'exp_num'])

    data.append(merged_df)

# Step 4: Concatenate the dataframes
df = pd.concat(data, ignore_index=True)

# Step 5: Now you have your data in a single dataframe!
print(df)

      round      loss  exp_num  accuracy
0         0  1.098612        1  0.289400
1         1  0.944096        1  0.570800
2         2  0.847279        1  0.596400
3         3  0.840964        1  0.594300
4         4  0.833092        1  0.601700
...     ...       ...      ...       ...
1005     96  0.606800       10  0.815866
1006     97  0.609700       10  0.814413
1007     98  0.607000       10  0.814664
1008     99  0.606400       10  0.814116
1009    100  0.610000       10  0.815246

[1010 rows x 4 columns]


In [26]:
df.to_excel('fedkrum_log.xlsx', columns=['exp_num', 'round', 'loss', 'accuracy'], index=False)