In [1]:
import numpy as np
import re

In [21]:
# Function to extract hyperparameters and accuracy from a block
def extract_info(block):
    # Remove the first line (header)
    block_lines = block.strip().split('\n')[1:]

    # Define the pattern for extracting hyperparameters
    pattern = r"(\w+):\s+(\S+)"
    matches = re.findall(pattern, '\n'.join(block_lines))

    # Extract accuracy for the mask
    accuracy_pattern = r"Accuracy for the mask: (\S+)"
    accuracy_mask = re.search(accuracy_pattern, block).group(1)

    hyperparameters = tuple(value for key, value in matches if key != 'sequence' and key != 'mask')

    return hyperparameters, float(accuracy_mask)

In [22]:
# Read the file
file_path = './ml/BERT/hyperparameters_combinations'
with open(file_path, 'r') as file:
    content = file.read()

# Split the content into blocks
blocks = re.split(r"\n\n", content)

In [23]:
combinations = []
accuracies = []
for block in blocks:
    if "Hyperparameters combinations:" in block:
        hyperparameters, accuracy_mask = extract_info(block)
        combinations.append(hyperparameters)
        accuracies.append(accuracy_mask)

In [49]:
best_combination = combinations[np.argmax(accuracies)]
print("Best combination:", best_combination, "with accuracy:", np.max(accuracies))

Best combination: ('32', '2', '32', '1', '0.01', '100') with accuracy: 0.4968499686666731


In [50]:
best_5_combinations = np.argsort(accuracies)[-5:][::-1]
print("5 best combinations:")
for i in best_5_combinations:
    print(combinations[i], "with accuracy:", accuracies[i])

5 best combinations:
('32', '2', '32', '1', '0.01', '100') with accuracy: 0.4968499686666731
('32', '2', '32', '1', '0.01', '50') with accuracy: 0.4678021735578361
('32', '2', '128', '1', '0.01', '100') with accuracy: 0.4544552429435231
('32', '2', '128', '1', '0.01', '50') with accuracy: 0.43934982882139195
('32', '2', '256', '1', '0.01', '100') with accuracy: 0.4355149466200393


In [51]:
worst_5_combinations = np.argsort(accuracies)[:5]
print("5 worst combinations:")
for i in worst_5_combinations:
    print(combinations[i], "with accuracy:", accuracies[i])

5 worst combinations:
('32', '4', '32', '1', '0.0001', '10') with accuracy: 0.09693371390482537
('32', '2', '32', '1', '0.0001', '10') with accuracy: 0.10512123894553611
('32', '8', '32', '1', '0.0001', '10') with accuracy: 0.10575870403953769
('32', '2', '128', '1', '0.0001', '10') with accuracy: 0.11945416106513644
('32', '8', '128', '1', '0.0001', '10') with accuracy: 0.12726312848075633
