In [1]:
import re
import pprint
import os

In [2]:
from torch.utils.tensorboard import SummaryWriter

In [3]:
def parse_log_accuracies(filepath: str) -> dict:
    """
    Parses a log file to extract batch accuracies for each epoch and step.

    Args:
        filepath: The path to the .log file.

    Returns:
        A nested dictionary where the outer keys are epoch numbers and the
        inner keys are step numbers, with batch accuracy as the value.
        Example: {1: {5: 0.9739, 10: 0.9783}, 2: {...}}
    """
    # Regex to capture epoch, step, and batch_acc
    # It looks for lines like: "[epoch: 1/20, step: 5/46 ... batch_acc: 0.9739 ..."
    log_pattern = re.compile(
        r"\[epoch: (\d+)/\d+, step: (\d+)/\d+ .*?batch_acc: ([\d.]+)"
    )

    accuracies = {}

    try:
        with open(filepath, 'r') as f:
            for line in f:
                match = log_pattern.search(line)
                if match:
                    # Extract captured groups
                    epoch = int(match.group(1))
                    step = int(match.group(2))
                    batch_acc = float(match.group(3))

                    # Populate the nested dictionary
                    if epoch not in accuracies:
                        accuracies[epoch] = {}
                    
                    accuracies[epoch][step] = batch_acc
    except FileNotFoundError:
        print(f"Error: The file '{filepath}' was not found.")
        return {}
        
    return accuracies

In [4]:
def parse_log_aux_losses(filepath: str) -> dict:
    """
    Parses a log file to extract batch_aux_loss for each epoch and step.

    Args:
        filepath: The path to the .log file.

    Returns:
        A nested dictionary of the form: {epoch: {step: aux_loss}}
    """
    # Regex is adjusted to capture 'batch_aux_loss' instead of 'batch_acc'
    log_pattern = re.compile(
        r"\[epoch: (\d+)/\d+, step: (\d+)/\d+ .*?batch_aux_loss: ([\d.]+)"
    )

    aux_losses = {}

    try:
        with open(filepath, 'r') as f:
            for line in f:
                match = log_pattern.search(line)
                if match:
                    # Extract captured groups
                    epoch = int(match.group(1))
                    step = int(match.group(2))
                    batch_aux_loss = float(match.group(3))

                    # Populate the nested dictionary
                    if epoch not in aux_losses:
                        aux_losses[epoch] = {}
                    
                    aux_losses[epoch][step] = batch_aux_loss
    except FileNotFoundError:
        print(f"Error: The file '{filepath}' was not found.")
        return {}
        
    return aux_losses


In [5]:
def parse_log_batch_losses(filepath: str) -> dict:
    """
    Parses a log file to extract batch_loss for each epoch and step.

    Args:
        filepath: The path to the .log file.

    Returns:
        A nested dictionary of the form: {epoch: {step: batch_loss}}
    """
    # Regex is adjusted to capture 'batch_loss'
    # Note: 'batch_loss' appears before other metrics, so the regex handles this.
    log_pattern = re.compile(
        r"\[epoch: (\d+)/\d+, step: (\d+)/\d+ .*?batch_loss: ([\d.]+)"
    )

    batch_losses = {}

    try:
        with open(filepath, 'r') as f:
            for line in f:
                match = log_pattern.search(line)
                if match:
                    # Extract captured groups
                    epoch = int(match.group(1))
                    step = int(match.group(2))
                    batch_loss = float(match.group(3))

                    # Populate the nested dictionary
                    if epoch not in batch_losses:
                        batch_losses[epoch] = {}
                    
                    batch_losses[epoch][step] = batch_loss
    except FileNotFoundError:
        print(f"Error: The file '{filepath}' was not found.")
        return {}
        
    return batch_losses


In [6]:
def parse_log_mIoU(filepath: str) -> dict:
    """
    Parses a log file to extract batch_mIoU for each epoch and step.

    Args:
        filepath: The path to the .log file.

    Returns:
        A nested dictionary of the form: {epoch: {step: mIoU}}
    """
    # Regex is adjusted to capture 'batch_mIoU'
    log_pattern = re.compile(
        r"\[epoch: (\d+)/\d+, step: (\d+)/\d+ .*?batch_mIoU: ([\d.]+)"
    )

    miou_scores = {}

    try:
        with open(filepath, 'r') as f:
            for line in f:
                match = log_pattern.search(line)
                if match:
                    # Extract captured groups
                    epoch = int(match.group(1))
                    step = int(match.group(2))
                    batch_mIoU = float(match.group(3))

                    # Populate the nested dictionary
                    if epoch not in miou_scores:
                        miou_scores[epoch] = {}
                    
                    miou_scores[epoch][step] = batch_mIoU
    except FileNotFoundError:
        print(f"Error: The file '{filepath}' was not found.")
        return {}
        
    return miou_scores

In [7]:
def parse_log_val_accuracies(filepath: str) -> dict:
    """
    Parses a log file to extract validation accuracies for each epoch.

    It handles two cases:
    1. The initial validation before training starts, assigned to epoch 0.
    2. End-of-epoch validations, assigned to their corresponding epoch number.

    Args:
        filepath: The path to the .log file.

    Returns:
        A dictionary mapping epoch number to validation accuracy.
        Example: {0: 0.8989, 1: 0.9150, 2: 0.9230}
    """
    # Pattern for the initial validation line (before epoch 1)
    initial_val_pattern = re.compile(
        r"\[Before any weight update, VALIDATION\].*?val_acc: ([\d.]+)"
    )
    
    # Pattern for end-of-epoch validation lines (assumes a similar format)
    epoch_val_pattern = re.compile(
        r"\[epoch: (\d+)/\d+.*?VALIDATION\].*?val_acc: ([\d.]+)"
    )

    val_accuracies = {}

    try:
        with open(filepath, 'r') as f:
            for line in f:
                # First, check for the initial validation line
                initial_match = initial_val_pattern.search(line)
                if initial_match:
                    val_acc = float(initial_match.group(1))
                    val_accuracies[0] = val_acc
                    continue # Move to the next line

                # If not initial, check for a standard end-of-epoch validation line
                epoch_match = epoch_val_pattern.search(line)
                if epoch_match:
                    epoch = int(epoch_match.group(1))
                    val_acc = float(epoch_match.group(2))
                    val_accuracies[epoch] = val_acc

    except FileNotFoundError:
        print(f"Error: The file '{filepath}' was not found.")
        return {}
        
    return val_accuracies

In [8]:
def parse_log_val_losses(filepath: str) -> dict:
    """
    Parses a log file to extract validation losses for each epoch.

    It handles two cases:
    1. The initial validation before training starts, assigned to epoch 0.
    2. End-of-epoch validations, assigned to their corresponding epoch number.

    Args:
        filepath: The path to the .log file.

    Returns:
        A dictionary mapping epoch number to validation loss.
        Example: {0: 0.3772, 1: 0.2510, 2: 0.2300}
    """
    # Pattern for the initial validation line (before epoch 1)
    initial_val_pattern = re.compile(
        r"\[Before any weight update, VALIDATION\].*?val_loss: ([\d.]+)"
    )
    
    # Pattern for end-of-epoch validation lines
    epoch_val_pattern = re.compile(
        r"\[epoch: (\d+)/\d+.*?VALIDATION\].*?val_loss: ([\d.]+)"
    )

    val_losses = {}

    try:
        with open(filepath, 'r') as f:
            for line in f:
                # First, check for the initial validation line
                initial_match = initial_val_pattern.search(line)
                if initial_match:
                    val_loss = float(initial_match.group(1))
                    val_losses[0] = val_loss
                    continue # Move to the next line

                # If not initial, check for a standard end-of-epoch validation line
                epoch_match = epoch_val_pattern.search(line)
                if epoch_match:
                    epoch = int(epoch_match.group(1))
                    val_loss = float(epoch_match.group(2))
                    val_losses[epoch] = val_loss

    except FileNotFoundError:
        print(f"Error: The file '{filepath}' was not found.")
        return {}
        
    return val_losses

In [9]:
def parse_log_val_mIoU(filepath: str) -> dict:
    """
    Parses a log file to extract validation mIoU scores for each epoch.

    It handles two cases:
    1. The initial validation before training starts, assigned to epoch 0.
    2. End-of-epoch validations, assigned to their corresponding epoch number.

    Args:
        filepath: The path to the .log file.

    Returns:
        A dictionary mapping epoch number to validation mIoU.
        Example: {0: 0.6055, 1: 0.6870, 2: 0.7120}
    """
    # Pattern for the initial validation line (before epoch 1)
    initial_val_pattern = re.compile(
        r"\[Before any weight update, VALIDATION\].*?val_mIoU: ([\d.]+)"
    )
    
    # Pattern for end-of-epoch validation lines
    epoch_val_pattern = re.compile(
        r"\[epoch: (\d+)/\d+.*?VALIDATION\].*?val_mIoU: ([\d.]+)"
    )

    val_miou_scores = {}

    try:
        with open(filepath, 'r') as f:
            for line in f:
                # First, check for the initial validation line
                initial_match = initial_val_pattern.search(line)
                if initial_match:
                    val_miou = float(initial_match.group(1))
                    val_miou_scores[0] = val_miou
                    continue # Move to the next line

                # If not initial, check for a standard end-of-epoch validation line
                epoch_match = epoch_val_pattern.search(line)
                if epoch_match:
                    epoch = int(epoch_match.group(1))
                    val_miou = float(epoch_match.group(2))
                    val_miou_scores[epoch] = val_miou

    except FileNotFoundError:
        print(f"Error: The file '{filepath}' was not found.")
        return {}
        
    return val_miou_scores

In [10]:
train_accs = parse_log_accuracies('/home/olivieri/exp/logs/seg/with_text/synth_contr/phase_3_vlm_text_from-e5_alpha0.1_250903_2007.log')
train_aux_losses = parse_log_aux_losses('/home/olivieri/exp/logs/seg/with_text/synth_contr/phase_3_vlm_text_from-e5_alpha0.1_250903_2007.log')
train_losses = parse_log_batch_losses('/home/olivieri/exp/logs/seg/with_text/synth_contr/phase_3_vlm_text_from-e5_alpha0.1_250903_2007.log')
train_mIoU = parse_log_mIoU('/home/olivieri/exp/logs/seg/with_text/synth_contr/phase_3_vlm_text_from-e5_alpha0.1_250903_2007.log')

In [11]:
tb_logs_dir_path = '/home/olivieri/exp/logs_tb/seg/with_text'
exp_name = 'phase_3_vlm_text_from-e5_alpha0.1_250903_2007.log'

In [12]:
tb_exp_dir = os.path.join(tb_logs_dir_path, exp_name)
os.makedirs(tb_exp_dir, exist_ok=True)
tb_writer = SummaryWriter(log_dir=tb_exp_dir)

In [13]:
for e, step_values in train_accs.items():
    for s, step_value in step_values.items():
        tb_writer.add_scalar(f"train/acc", step_value, (e-1)*46 + s)

In [14]:
for e, step_values in train_aux_losses.items():
    for s, step_value in step_values.items():
        tb_writer.add_scalar(f"train/aux_loss", step_value, (e-1)*46 + s)

In [15]:
for e, step_values in train_losses.items():
    for s, step_value in step_values.items():
        tb_writer.add_scalar(f"train/loss", step_value, (e-1)*46 + s)

In [16]:
for e, step_values in train_mIoU.items():
    for s, step_value in step_values.items():
        tb_writer.add_scalar(f"train/mIoU", step_value, (e-1)*46 + s)

In [17]:
val_accs = parse_log_val_accuracies('/home/olivieri/exp/logs/seg/with_text/synth_contr/phase_3_vlm_text_from-e5_alpha0.1_250903_2007.log')
val_losses = parse_log_val_losses('/home/olivieri/exp/logs/seg/with_text/synth_contr/phase_3_vlm_text_from-e5_alpha0.1_250903_2007.log')
val_mIoU = parse_log_val_mIoU('/home/olivieri/exp/logs/seg/with_text/synth_contr/phase_3_vlm_text_from-e5_alpha0.1_250903_2007.log')

In [18]:
for e, step_value in val_accs.items():
    tb_writer.add_scalar(f"val/acc", step_value, e)

In [19]:
for e, step_value in val_losses.items():
    tb_writer.add_scalar(f"val/loss", step_value, e)

In [20]:
for e, step_value in val_mIoU.items():
    tb_writer.add_scalar(f"val/mIoU", step_value, e)

In [21]:
tb_writer.close()