# Notebook: Format Hyperparameter Tuning Results


## Packages


In [1]:
import pandas as pd
import numpy as np

## Settings


In [2]:
TASKS = ["acd", "acsa"]
TASK_ENCODING = {"acd": "\\textbf{ACD}", "acsa": "\\textbf{ACSA}"}
SAMPLE_SIZES = [500, 1000, 1500, 2000]

## Code


In [3]:
def round_number(num, decimal_places):
    formatted_num = "{:.{}f}".format(num, decimal_places)
    rounded_num_str = "{:.{}f}".format(float(formatted_num), decimal_places)
    return rounded_num_str


def add_thousand_dots(n_sample):
    if isinstance(n_sample, str):
        if '.' in n_sample:
            integer_part, decimal_part = n_sample.split('.')
            formatted_integer_part = "{:,}".format(int(integer_part))
            result = f"{formatted_integer_part}.{decimal_part}"
        else:
            result = "{:,}".format(int(n_sample))
    elif isinstance(n_sample, np.float64):
        result = "{:,}".format(round(n_sample, 1))
    else:
        result = n_sample

    return result

In [4]:
for task_idx, task in enumerate(TASKS):
    for sample_size_idx, sample_size in enumerate(SAMPLE_SIZES):
        tsv_file_path = f"optuna_20_gbert_{task}_{sample_size}.tsv"
        df = pd.read_csv(tsv_file_path, delimiter='\t')
        max_f1_micro_index = df['f1_micro'].idxmax()
        values_at_max_f1_micro = df.loc[max_f1_micro_index, [
            'learning_rate', 'num_train_epochs', 'per_device_train_batch_size', 'f1_micro', 'accuracy']]
        
        if sample_size_idx == 0:
            task_print = "\multirow{4}{*}{" + TASK_ENCODING[task] + "}"
        else:
            task_print = " "
        
        print(task_print,
              "&", add_thousand_dots(str(sample_size)),
              "&", str(values_at_max_f1_micro["num_train_epochs"]),
              "&", str(values_at_max_f1_micro["learning_rate"]),
              "&", str(values_at_max_f1_micro["per_device_train_batch_size"]),
              "&", round_number(values_at_max_f1_micro["f1_micro"], 3),
              "&", round_number(values_at_max_f1_micro["accuracy"], 3), "\\\\")
        
        if sample_size_idx == 3:
            print("\\hline")

\multirow{4}{*}{\textbf{ACD}} & 500 & 13 & 3e-05 & 8 & 0.889 & 0.798 \\
  & 1,000 & 14 & 2e-05 & 8 & 0.898 & 0.822 \\
  & 1,500 & 10 & 3e-05 & 16 & 0.901 & 0.824 \\
  & 2,000 & 7 & 3e-05 & 32 & 0.901 & 0.828 \\
\hline
\multirow{4}{*}{\textbf{ACSA}} & 500 & 15 & 2e-05 & 16 & 0.755 & 0.621 \\
  & 1,000 & 15 & 2e-05 & 8 & 0.798 & 0.696 \\
  & 1,500 & 20 & 2e-05 & 16 & 0.811 & 0.704 \\
  & 2,000 & 18 & 3e-05 & 32 & 0.808 & 0.706 \\
\hline
