In [2]:
import os
import time 
import datetime

def print_table(data: dict[str, list], precision: int = 4):
    if not data:
        print("(empty)")
        return

    # Determine number of rows and ensure index column exists and is first
    n_rows = len(next(iter(data.values())))
    if "index" not in data or len(data["index"]) != n_rows:
        data["index"] = list(range(1, n_rows + 1))
    headers = ["index"] + [h for h in data.keys() if h != "index"]

    # Numeric format
    fmt = f"{{:.{precision}f}}"

    # Detect string columns: if any value is a string, treat entire column as strings
    is_string_col = {
        h: any(isinstance(v, str) for v in data[h])
        for h in headers
    }

    # Compute column widths
    col_widths = []
    for h in headers:
        values = data[h]
        if is_string_col[h]:
            max_val_len = max(len(str(v)) for v in values)
        else:
            max_val_len = max(len(fmt.format(v)) for v in values)
        col_widths.append(max(len(h), max_val_len))

    # Build header row (strings left-aligned, numbers right-aligned)
    header_cells = []
    for h, w in zip(headers, col_widths):
        header_cells.append(f"{h:<{w}}" if is_string_col[h] else f"{h:>{w}}")
    header_row = " | ".join(header_cells)
    separator = "-+-".join("-" * w for w in col_widths)

    # Print table
    print(header_row)
    print(separator)
    for i in range(n_rows):
        row_cells = []
        for h, w in zip(headers, col_widths):
            v = data[h][i]
            if is_string_col[h]:
                row_cells.append(str(v).ljust(w))
            else:
                row_cells.append(fmt.format(v).rjust(w))
        print(" | ".join(row_cells))

In [3]:
def dt_to_intkey(date_str: str, time_str: str) -> float:
    dt = datetime.datetime.strptime(f"{date_str} {time_str}", "%Y_%m_%d %H_%M_%S")
    return dt.timestamp()

In [6]:
data_dir = "../../logs/full_runs/2026_01_23_YCoCg_one_by_one_Kodak/results/"
res_files_unfiltered = [f for f in os.listdir(data_dir) if f.endswith(".log")]
res_files_unfiltered.sort()
# print(*res_files_unfiltered, sep="\n")
dates, times = zip(*(s.split("__")[:2] for s in res_files_unfiltered))
creation_times = {
    # key is date_time string converted to int/float for sorting
    index: dt_to_intkey(date, time)
    for index, (date, time) in enumerate(zip(dates, times))
}
only_after = dt_to_intkey("2025_10_30", "00_00_00")
res_files = [
    res_files_unfiltered[index]
    for index, creation_time in creation_times.items()
    if creation_time >= only_after
]
# print(*res_files, sep="\n")
print(len(res_files), "files found after filtering.")

32 files found after filtering.


In [7]:
# Example log lines we are looking for:
# Using color space YCoCg with bitdepths [8, 9, 9]
# Using image ARM: True
# Using encoder gain: 64
# Using multi-region image ARM: True
# Using color regression: False
# Total training iterations: 142200
# Total MAC per pixel: 1694.3125
# Final results after quantization: Loss: 3.278064727783203, Rate NN: 0.11986033121744792, Rate Latent: 0.1637668013572693, Rate Img: 2.9944374561309814
# Rate Img bistream: 7.751302083333333

using_color_space = ""
using_image_arm = False
using_encoder_gain = 0
using_multi_region_image_arm = False
using_color_regression = False
total_training_iterations = 0
total_mac_per_pixel = 0.0
final_results = []
rate_img_bistream = []

with open(os.path.join(data_dir, res_files[0]), "r") as infile:
    lines = infile.readlines()    
    for line in lines:
        if line.startswith("Using color space"):
            using_color_space = line[len("Using color space") :].strip().split(" with bitdepths")[0]
        if line.startswith("Using image ARM:"):
            using_image_arm = line[len("Using image ARM:"):].strip() == "True"
        if line.startswith("Using encoder gain:"):
            using_encoder_gain = int(line[len("Using encoder gain:"):].strip())
        if line.startswith("Using multi-region image ARM:"):
            using_multi_region_image_arm = line[len("Using multi-region image ARM:"):].strip() == "True"
        if line.startswith("Using color regression:"):
            using_color_regression = line[len("Using color regression:"):].strip() == "True"
        if line.startswith("Total training iterations:"):
            total_training_iterations = int(line[len("Total training iterations:"):].strip())
        if line.startswith("Total MAC per pixel:"):
            total_mac_per_pixel = float(line[len("Total MAC per pixel:"):].strip())

    if using_color_space == "":
        print(f"Could not determine color space for file {res_files[0]}, skipping.")
        raise ValueError("No color space found.")
print("Using color space:", using_color_space)
print("Using Image ARM:", using_image_arm)
print("Using encoder gain:", using_encoder_gain)
print("Using multi-region image ARM:", using_multi_region_image_arm)
print("Using color regression:", using_color_regression)
print("Total training iterations:", total_training_iterations)
print("Total MAC per pixel:", total_mac_per_pixel)


for f in res_files:
    with open(os.path.join(data_dir, f), "r") as infile:
        lines = infile.readlines()   

        for line in lines:
            if line.startswith("Rate Img bistream:"):
                rate_img_bistream.append(float(line[len("Rate Img bistream:"):].strip()))
            if "Best loss for module upsampling:" in line:
                parts = line[len("Best loss for module upsampling:") :].strip().split(", ")
                # im_index = int(f.split("_")[-1][len("kodim") :][: -len(".log")]) - 1
                final_results.append({})
                final_results[-1]["Im_name"] = f.split("_")[-1][:7]
                for part in parts:
                    key, value = part.split(": ")
                    final_results[-1][key] = float(value)

# print("Rate Img bistreams:", rate_img_bistream)
# print("Final results collected:", final_results)

def list_dict_to_dict_list(
    lst: list[dict[str, float]],
) -> dict[str, list[float]]:
    if not lst:
        return {}
    keys = lst[0].keys()
    dict_list = {key: [] for key in keys}
    for d in lst:
        for key in keys:
            dict_list[key].append(d.get(key, 0.0))
    return dict_list

results_table = list_dict_to_dict_list(final_results)
results_table["Rate Img bistream"] = rate_img_bistream

# I have an issue where some entries are duplicated, it's possible to separete them by `Im_name` entry.
# I.e. I have to sort the table by `Im_name` and then discard the rows with worse `Loss`
def sort_by_im_name(table: dict[str, list]) -> dict[str, list]:
    n_rows = len(next(iter(table.values())))
    im_names = table["Im_name"]
    indices = list(range(n_rows))
    indices.sort(key=lambda i: im_names[i])
    sorted_table = {key: [table[key][i] for i in indices] for key in table.keys()}
    return sorted_table
results_table = sort_by_im_name(results_table)

# Now discard duplicates
def discard_duplicates(table: dict[str, list]) -> dict[str, list]:
    n_rows = len(next(iter(table.values())))
    im_names = table["Im_name"]
    seen = dict()
    indices_to_keep = []
    for i in range(n_rows):
        im_name = im_names[i]
        if im_name not in seen:
            seen[im_name] = (table["Loss"][i], i)
            indices_to_keep.append(i)
        else:
            current_loss = table["Loss"][i]
            best_loss, best_index = seen[im_name]
            if current_loss < best_loss:
                # Replace the previous best
                seen[im_name] = (current_loss, i)
                # Replace index in indices_to_keep
                indices_to_keep[indices_to_keep.index(best_index)] = i
    filtered_table = {key: [table[key][i] for i in indices_to_keep] for key in table.keys()}
    return filtered_table
results_table = discard_duplicates(results_table)
print("Entries after discarding duplicates:", len(results_table["Im_name"]))
print_table(results_table)

for key in results_table.keys():
    # results_for_key = [d[0] for d in results_table[key]]
    # results_for_key = list_dict_to_dict_list(results_for_key)

    # print(f"{key}, Using Image ARM: {using_image_arm} Results:")
    # for i in range(len(results_for_key['Loss'])):
    #     results_for_key['Loss'][i] = results_for_key['Loss'][i] - results_for_key['Rate NN'][i]
    #     results_for_key['Rate NN'][i] = results_for_key['Rate NN'][i] #/ (512 * 768 * 3)
    #     results_for_key['Loss'][i] = results_for_key['Loss'][i] + results_for_key['Rate NN'][i]
    # print_table(results_for_key)
    # for key, values in results_for_key.items():
    avg = sum(results_table[key]) / len(results_table[key]) if (results_table[key] and isinstance(results_table[key][0], (int, float))) else 0
    print(f"{key}: {avg:.4f}")

Using color space: YCoCg
Using Image ARM: True
Using encoder gain: 64
Using multi-region image ARM: False
Using color regression: False
Total training iterations: 142200
Total MAC per pixel: 1694.3125
Entries after discarding duplicates: 24
  index | Im_name |   Loss | Rate NN | Rate Latent | Rate Img | Rate Img bistream
--------+---------+--------+---------+-------------+----------+------------------
 1.0000 | kodim01 | 3.3551 |  0.0129 |      0.0460 |   3.2963 |            9.1527
 2.0000 | kodim02 | 2.7868 |  0.0142 |      0.2176 |   2.5550 |           10.6002
 3.0000 | kodim03 | 2.5035 |  0.0153 |      0.0981 |   2.3901 |            9.5091
 4.0000 | kodim04 | 2.8906 |  0.0135 |      0.1698 |   2.7073 |            8.0480
 5.0000 | kodim05 | 3.4435 |  0.0158 |      0.2645 |   3.1633 |            9.8744
 6.0000 | kodim06 | 2.9868 |  0.0140 |      0.2381 |   2.7346 |            8.2617
 7.0000 | kodim07 | 2.6495 |  0.0156 |      0.3799 |   2.2540 |           10.6006
 8.0000 | kodim08 | 3