# Installation

In [5]:
%%capture
!pip install tensorboard ipykernel

# Importing the libraries

In [6]:
import os
from collections import defaultdict
import numpy as np
import matplotlib.pyplot as plt
from tensorboard.backend.event_processing import event_accumulator
from matplotlib.ticker import FuncFormatter

# Formatting the results

In [7]:
def smooth_data(data, smoothing_factor=0.9):
    """Applies exponential moving average (EMA) to smooth data."""
    smoothed = []
    for i, value in enumerate(data):
        if i == 0:
            smoothed.append(value)
        else:
            smoothed.append(smoothing_factor * smoothed[-1] + (1 - smoothing_factor) * value)
    return smoothed

def format_steps_in_millions(x, pos):
    """Formats the x-axis labels to display steps in millions."""
    return f'{x / 1e6:.1f}M'

Converts TensorBoard logs to high-quality images.

Parameters:
- log_dir (str): Directory containing the TensorBoard log subdirectories.
- names (list of str): List of names corresponding to each log subdirectory.
- output_dir (str): Directory where the images will be saved.
- smoothing_factor (float, optional): Smoothing factor for exponential moving average. Default is 0.9.
- max_steps (int, optional): Maximum number of steps to include in the plots. Default is 20,000,000.
- set_max_point (bool, optional): Whether to show the maximum point on the plots. Default is True.

Raises:
- ValueError: If the number of names provided is less than the number of log subdirectories.

This function processes TensorBoard event files, aggregates scalar data, applies smoothing, and generates
high-resolution plots for each metric. The plots are saved as PDF files in the specified output directory.

In [8]:
def tensorboard_to_images(log_dir, names, output_dir, smoothing_factor=0.9, max_steps=20_000_000, set_max_point=True):
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    
    # Dictionary to hold aggregated data for each metric
    aggregated_data = defaultdict(lambda: defaultdict(list))
    
    # Get immediate subdirectories of log_dir and sort them alphabetically
    log_dirs = sorted([os.path.join(log_dir, sub_dir) for sub_dir in os.listdir(log_dir) if os.path.isdir(os.path.join(log_dir, sub_dir))])
    
    # Map subdirectories to names
    if len(log_dirs) > len(names):
        raise ValueError("Not enough names provided for the log directories.")
    legend_names = {os.path.basename(log_dirs[i]): names[i] for i in range(len(log_dirs))}
    
    for i, log_dir in enumerate(log_dirs):
        print(f"log_dir: {log_dir} and names: {names[i]}")
    
    for log_dir in log_dirs:
        print(f"Processing {log_dir}...")
        method_name = legend_names.get(os.path.basename(log_dir), os.path.basename(log_dir))
        for subdir, _, files in os.walk(log_dir):
            for file in files:
                if "events.out.tfevents" in file:
                    event_file = os.path.join(subdir, file)
                    ea = event_accumulator.EventAccumulator(event_file)
                    ea.Reload()
                    
                    for tag in ea.Tags()['scalars']:
                        events = ea.Scalars(tag)
                        steps = [e.step for e in events]
                        values = [e.value for e in events]
                        
                        # Trim steps and values to max_steps
                        trimmed_steps = [step for step in steps if step <= max_steps]
                        trimmed_values = [values[i] for i, step in enumerate(steps) if step <= max_steps]
                        
                        # Append trimmed data to the corresponding metric (tag) and method
                        aggregated_data[tag][method_name].append((trimmed_steps, trimmed_values))
    
    # Create a separate plot for each metric (tag)
    for tag, methods in aggregated_data.items():
        plt.figure(figsize=(10, 6), dpi=300)  # High-resolution output for publication
        
        # Plot all methods for this metric
        for method_name, data in methods.items():
            all_steps = []
            all_values = []
            for steps, values in data:
                all_steps.extend(steps)
                all_values.extend(values)
            
            # Sort by steps
            sorted_indices = np.argsort(all_steps)
            sorted_steps = np.array(all_steps)[sorted_indices]
            sorted_values = np.array(all_values)[sorted_indices]
            
            # Smooth values
            smoothed_values = smooth_data(sorted_values, smoothing_factor)
            
            # Plot the smoothed data
            line, = plt.plot(sorted_steps, smoothed_values, label=f"{method_name}", linewidth=2)
            
            # Highlight the maximum value with a red dot (exclude from legend)
            max_value = max(smoothed_values)
            max_index = np.argmax(smoothed_values)
            max_step = sorted_steps[max_index]
            if set_max_point:
                plt.scatter(max_step, max_value, color='red', zorder=5, label='_nolegend_')  # No legend for this point
        
        # Enhance plot aesthetics
        plt.xlabel('Steps (in Millions)', fontsize=14)
        plt.ylabel('Values', fontsize=14)
        plt.title(f'TensorBoard Scalars - {tag}', fontsize=16)
        plt.legend(fontsize=12)
        plt.grid(True, linestyle='--', alpha=0.6)
        
        # Format x-axis labels to show steps in millions
        plt.gca().xaxis.set_major_formatter(FuncFormatter(format_steps_in_millions))
        
        # Save each plot to a high-quality file
        output_image_name = f"{tag.replace('/', '_')}.pdf"
        plt.savefig(os.path.join(output_dir, output_image_name), format='pdf', bbox_inches='tight')  # Use vector-based format
        plt.close()

In [12]:
# Example usage
log_dir = "results_all"
#names = ["Foward and Backward", "Only Forward", "Sound and Forward"]
names = ["Forward and Backward (0.0003)", "Forward and Backward (0.001)", "Only Forward (0.0003)", "Only Forward (0.001)", "Sound and Forward (0.0003)", "Sound and Forward (0.001)"]
output_dir = "combined_results"
tensorboard_to_images(log_dir, names, output_dir, smoothing_factor=0.8, max_steps=20_000_000, set_max_point=False)

log_dir: results_all/F&B_0.0003 and names: Forward and Backward (0.0003)
log_dir: results_all/F&B_0.001 and names: Forward and Backward (0.001)
log_dir: results_all/F_0.0003 and names: Only Forward (0.0003)
log_dir: results_all/F_0.001 and names: Only Forward (0.001)
log_dir: results_all/S_0.0003 and names: Sound and Forward (0.0003)
log_dir: results_all/S_0.001 and names: Sound and Forward (0.001)
Processing results_all/F&B_0.0003...
Processing results_all/F&B_0.001...
Processing results_all/F_0.0003...
Processing results_all/F_0.001...
Processing results_all/S_0.0003...
Processing results_all/S_0.001...
