In [43]:
import os
import ipywidgets as widgets
from IPython.display import display, Image, HTML

# Define paths
input_folder = '/data/users/pfont/input'
tesseract_folder = '/data/users/pfont/out_tesseract'
llm_folder = '/data/users/pfont/out_llm'

# Get list of available file names (without extensions)
file_names = [f.split('.')[0] for f in os.listdir(input_folder) if f.endswith('.jpg')]
file_names.sort()

# Create a dropdown widget
file_name_dropdown = widgets.Dropdown(
    options=file_names,
    description='File:',
    layout=widgets.Layout(width='50%')
)

# Create an output widget
output_display = widgets.Output()

def display_files(file_name):
    with output_display:
        output_display.clear_output()
        
        image_path = os.path.join(input_folder, file_name + '.jpg')
        tesseract_path = os.path.join(tesseract_folder, file_name + '.txt')
        llm_path = os.path.join(llm_folder, file_name + '.txt')
        
        if not os.path.exists(image_path):
            print(f"Image not found: {image_path}")
            return
        
        if not os.path.exists(tesseract_path):
            print(f"Tesseract output not found: {tesseract_path}")
            return
        
        if not os.path.exists(llm_path):
            print(f"LLM output not found: {llm_path}")
            return
        
        with open(tesseract_path, 'r', encoding='utf-8') as f:
            tesseract_text = f.read()
        
        with open(llm_path, 'r', encoding='utf-8') as f:
            llm_text = f.read()
        
        display(HTML(f"""
        <table style='width:100%; border-collapse: collapse;'>
            <tr>
                <th style='border: 1px solid black; padding: 10px;'>Image</th>
                <th style='border: 1px solid black; padding: 10px;'>Tesseract Output</th>
                <th style='border: 1px solid black; padding: 10px;'>LLM Output</th>
            </tr>
            <tr>
                <td style='border: 1px solid black; padding: 10px; width: 33%;'>
                    <img src='data:image/jpeg;base64,{convert_image_to_base64(image_path)}' style='width: 100%; height: auto;'>
                </td>
                <td style='border: 1px solid black; padding: 10px; width: 33%; max-width: 300px; overflow-wrap: break-word;'>
                    <pre style='white-space: pre-wrap; font-size: 12px; line-height: 1.2em;'>{tesseract_text}</pre>
                </td>
                <td style='border: 1px solid black; padding: 10px; width: 33%; max-width: 300px; overflow-wrap: break-word;'>
                    <pre style='white-space: pre-wrap; font-size: 12px; line-height: 1.2em;'>{llm_text}</pre>
                </td>
            </tr>
        </table>
        """))

def convert_image_to_base64(image_path):
    import base64
    with open(image_path, "rb") as image_file:
        return base64.b64encode(image_file.read()).decode("utf-8")

# Create an interactive button
button = widgets.Button(description="Show Files")

def on_button_click(b):
    display_files(file_name_dropdown.value)
    
button.on_click(on_button_click)

# Display widgets
display(file_name_dropdown, button, output_display)



Dropdown(description='File:', layout=Layout(width='50%'), options=('rsc37_rsc176_1043_0', 'rsc37_rsc176_1043_1…

Button(description='Show Files', style=ButtonStyle())

Output()