# Notebook 04: Qualitative Comparison of VLM Rationales

**Author:** Antonio Guillen-Perez
**Date:** November 17, 2025

## Objective

This notebook provides an interactive tool to perform a qualitative, side-by-side comparison of the causal rationales generated by two different VLM configurations. The goal is to assess the impact of model precision (4-bit vs. 8-bit) on the quality, factual correctness, and causal insightfulness of the generated text.

**Models Under Comparison:**
- **Model A (Baseline):** `google/gemma-3-12b-it-qat-q4_0-gguf` (4-bit Quantization)
- **Model B (Upgrade):** `unsloth/gemma-3-12b-it-UD-Q8_K_XL.gguf` (8-bit K-Quant)

Select a `scenario_id` from the dropdown menu to load the corresponding GIF and compare the outputs of the two models.

In [None]:
import os
import sys
from glob import glob
import ipywidgets as widgets
from IPython.display import display, clear_output, Image, HTML

# --- Add project root to path for robust imports ---
PROJECT_ROOT = os.path.abspath(os.path.join(os.getcwd(), os.pardir))
if PROJECT_ROOT not in sys.path:
    sys.path.append(PROJECT_ROOT)

print(f"Project Root set to: {PROJECT_ROOT}")

In [None]:
# --- 1. Define Paths to the Rationale and GIF Directories ---

# IMPORTANT: Update these folder names if you used different ones
RATIONALES_DIR_MODEL_A = os.path.join(PROJECT_ROOT, 'outputs', 'rationales_gemma3-12b-Q4')
RATIONALES_DIR_MODEL_B = os.path.join(PROJECT_ROOT, 'outputs', 'rationales_gemma-3-12b-it-UD-Q8_K_XL')
GIF_DIR = os.path.join(PROJECT_ROOT, 'outputs', 'preprocessed_scenarios')

print(f"Model A Path: {RATIONALES_DIR_MODEL_A}")
print(f"Model B Path: {RATIONALES_DIR_MODEL_B}")
print(f"GIF Path: {GIF_DIR}")

# --- 2. Find Common Scenarios Processed by Both Models ---

# Use sets for an efficient intersection operation
scenarios_a = set(os.listdir(RATIONALES_DIR_MODEL_A))
scenarios_b = set(os.listdir(RATIONALES_DIR_MODEL_B))

common_scenarios = sorted(list(scenarios_a.intersection(scenarios_b)))

if not common_scenarios:
    print("❌ ERROR: No common scenarios found between the two directories. Please check your paths.")
else:
    print(f"✅ Found {len(common_scenarios)} common scenarios to compare.")

In [4]:
# --- 3. Create the Interactive Dashboard --- 

# Create the dropdown widget for scenario selection
scenario_dropdown = widgets.Dropdown(
    options=common_scenarios,
    description='Scenario ID:',
    style={'description_width': 'initial'}
)

# Create output widgets to hold the content
gif_output = widgets.Output()
rationale_a_output = widgets.Output()
rationale_b_output = widgets.Output()

def display_comparison(change):
    """This function is called whenever the dropdown value changes."""
    scenario_id = change['new']
    
    # --- Clear previous outputs ---
    gif_output.clear_output(wait=True)
    rationale_a_output.clear_output(wait=True)
    rationale_b_output.clear_output(wait=True)
    
    # --- Display the GIF ---
    with gif_output:
        gif_path = os.path.join(GIF_DIR, scenario_id, 'scenario.gif')
        if os.path.exists(gif_path):
            display(Image(filename=gif_path, width=700))
        else:
            print(f"GIF not found for {scenario_id}")
            
    # --- Display Rationale for Model A ---
    with rationale_a_output:
        rationale_path = os.path.join(RATIONALES_DIR_MODEL_A, scenario_id, 'rationale.txt')
        if os.path.exists(rationale_path):
            with open(rationale_path, 'r', encoding='utf-8') as f:
                content = f.read()
            # Use HTML for nice formatting with a header and pre-formatted text
            display(HTML(f"<h3>Model A: 4-bit QAT</h3><pre style='white-space: pre-wrap; word-wrap: break-word;'>{content}</pre>"))
        else:
            print("Rationale not found for Model A.")
            
    # --- Display Rationale for Model B ---
    with rationale_b_output:
        rationale_path = os.path.join(RATIONALES_DIR_MODEL_B, scenario_id, 'rationale.txt')
        if os.path.exists(rationale_path):
            with open(rationale_path, 'r', encoding='utf-8') as f:
                content = f.read()
            display(HTML(f"<h3>Model B: 8-bit UD K-Quant</h3><pre style='white-space: pre-wrap; word-wrap: break-word;'>{content}</pre>"))
        else:
            print("Rationale not found for Model B.")

# Link the function to the dropdown's 'observe' method
scenario_dropdown.observe(display_comparison, names='value')

# Arrange the output widgets in a side-by-side layout
comparison_box = widgets.HBox([rationale_a_output, rationale_b_output])

# Display the full dashboard
display(widgets.VBox([scenario_dropdown, gif_output, comparison_box]))

# --- Trigger the display for the first scenario on initial load ---
if common_scenarios:
    display_comparison({'new': common_scenarios[0]})

VBox(children=(Dropdown(description='Scenario ID:', options=('20d181afd1d5865b', '352093c6bb741b97', '3c082b48…