In [2]:
from pathlib import Path

In [3]:
def parse_log_file(log_file_path):
    # Initial setup
    best_results = []
    current_hyperparameters = None
    best_accuracy = 0.0
    with open(log_file_path, 'r') as file:
        for line in file:
            # Strip any leading/trailing whitespaces
            line = line.strip()
            
            # Find hyperparameter configuration block
            if line.startswith('Hyperparameters:'):
                # Store best accuracy so far (except at the very beginning)
                if current_hyperparameters:
                    best_results.append((current_hyperparameters, best_accuracy))
                
                # New block of hyperparameters, reset tracking variables
                current_hyperparameters = line
                best_accuracy = 0.0
            elif line.startswith('Epoch'):
                # Process each epoch line within a hyperparameter block
                parts = line.split(',')
                # Example Val Accuracy part: ' Val Accuracy: 0.1333705373108387'
                val_accuracy_text = parts[-1].strip()
                accuracy = float(val_accuracy_text.split(' ')[-1])
                
                # Track the best accuracy
                best_accuracy = max(best_accuracy, accuracy)
        
        # Append the last block's results
        if current_hyperparameters:
            best_results.append((current_hyperparameters, best_accuracy))
    
    return best_results

In [23]:
# Call the function
log_file_path = '/vol/biomedic3/bglocker/ugproj2324/nns20/CheXagent/model_inspection/linear_probe_hyperparam_searches/VinDr_search'
results = parse_log_file(log_file_path)
for result in results:
    print(result)

('Hyperparameters: layer=post_layer_norm, batch_size=64, num_epochs=10, learning_rate=0.0001', 0.6920454621315002)
('Hyperparameters: layer=post_layer_norm, batch_size=64, num_epochs=10, learning_rate=0.001', 0.7113636374473572)
('Hyperparameters: layer=post_layer_norm, batch_size=64, num_epochs=10, learning_rate=0.01', 0.6923295497894287)
('Hyperparameters: layer=post_layer_norm, batch_size=64, num_epochs=20, learning_rate=0.0001', 0.693181824684143)
('Hyperparameters: layer=post_layer_norm, batch_size=64, num_epochs=20, learning_rate=0.001', 0.7122159123420715)
('Hyperparameters: layer=post_layer_norm, batch_size=64, num_epochs=20, learning_rate=0.01', 0.6892045497894287)
('Hyperparameters: layer=post_layer_norm, batch_size=64, num_epochs=40, learning_rate=0.0001', 0.7122159123420715)
('Hyperparameters: layer=post_layer_norm, batch_size=64, num_epochs=40, learning_rate=0.001', 0.7230113744735718)
('Hyperparameters: layer=post_layer_norm, batch_size=64, num_epochs=40, learning_rate=0.

In [26]:
# write the results to a csv
output_file_path = Path(log_file_path).with_suffix('.csv')
header = ["layer","batch_size","num_epochs","learning_rate","val_accuracy"]
with open(output_file_path, 'w') as file:
    for result in results:
        # split hyperparameters into separate columns
        hyperparameters = [val.split("=")[1] for val  in result[0].split(',')]
        # write each hyperparameter and the accuracy to the csv
        file.write(",".join(hyperparameters + [str(result[1])]) + "\n")