In [2]:
import re
import csv

def convert_txt_to_csv(input_files, output_file):
    header = ['Epoch', 'Train Loss', 'Train Accuracy', 'Test Loss', 'Test Accuracy']
    rows = []

    for file_name in input_files:
        with open(file_name, 'r') as file:
            lines = file.readlines()
        
        epoch = None
        train_loss = train_acc = test_loss = test_acc = None

        for line in lines:
            # Extract epoch from training settings
            match = re.search(r'epoch=(\d+)', line)
            if match:
                epoch = int(match.group(1))  # Convert epoch to integer

            # Extract training loss and accuracy
            match = re.search(r'Training: Epoch=\d+ \| Loss: ([\d.]+) \|  Acc: ([\d.]+)%', line)
            if match:
                train_loss, train_acc = match.groups()

            # Extract testing loss and accuracy
            match = re.search(r'Testing: Epoch=\d+ \| Loss: ([\d.]+) \|  Acc: ([\d.]+)%', line)
            if match:
                test_loss, test_acc = match.groups()

                # Append row only when a full set of data is found
                if epoch is not None and train_loss and train_acc and test_loss and test_acc:
                    rows.append([epoch, train_loss, f"tensor({train_acc})", test_loss, f"tensor({test_acc})"])

            # Stop at epoch 150
            if epoch is not None and epoch >= 150:
                break

    # Write to CSV file
    with open(output_file, 'w', newline='') as csvfile:
        writer = csv.writer(csvfile)
        writer.writerow(header)
        writer.writerows(rows)

if __name__ == "__main__":
    input_files = [
        'training_stats_HARPredNetBpD_15CLS_FalseNes_0.001WD_FalseTIED_10REP.txt',
        'training_stats_HARPredNetBpD_15CLS_FalseNes_0.001WD_FalseTIED_15REP.txt'
    ]
    output_file = 'training_metrics_HARPredNetBpD_15CLS_FalseNes_0.001WD_FalseTIED_1REP.csv'

    convert_txt_to_csv(input_files, output_file)
