## 1. Backend Initialization

This cell sets up the S3 backend connection and credential management for accessing ECHO S3 storage.

**Key Functions:**
- `read_json_creds()`: Read credentials from JSON file
- `load_credentials()`: Load credentials from JSON file or environment variables  
- `make_s3_client()`: Create boto3 S3 client with ECHO endpoint configuration
- Automatically lists available buckets after initialization

**Output:** 
- Displays loaded credentials (first 6 characters only for security)
- Lists all available S3 buckets

In [None]:
# Cell 1: Backend initialization 
import os, json
from botocore.exceptions import NoCredentialsError
import boto3
from botocore.config import Config

ECHO_ENDPOINT = "https://s3.echo.stfc.ac.uk"
DEFAULT_JSON = os.path.expanduser("/home/shun/Project/S3/echo_creds.json")

def read_json_creds(path):
    with open(os.path.expanduser(path), 'r') as f:
        j = json.load(f)
    if 'access_key' in j and 'secret_key' in j:
        return {'access_key': j['access_key'], 'secret_key': j['secret_key']}
    if 'aws_access_key_id' in j and 'aws_secret_access_key' in j:
        return {'access_key': j['aws_access_key_id'], 'secret_key': j['aws_secret_access_key']}
    raise KeyError("Invalid credential format")

def load_credentials(json_path=None):
    if json_path:
        return read_json_creds(json_path)
    try:
        return read_json_creds(DEFAULT_JSON)
    except:
        pass
    ak = os.environ.get('AWS_ACCESS_KEY_ID') or os.environ.get('AWS_ACCESS_KEY')
    sk = os.environ.get('AWS_SECRET_ACCESS_KEY') or os.environ.get('AWS_SECRET_KEY')
    if ak and sk:
        return {'access_key': ak, 'secret_key': sk}
    raise NoCredentialsError("No credentials found")

def make_s3_client(creds):
    return boto3.client(
        's3',
        endpoint_url=ECHO_ENDPOINT,
        aws_access_key_id=creds['access_key'],
        aws_secret_access_key=creds['secret_key'],
        config=Config(s3={'addressing_style': 'path'})
    )

creds = load_credentials()
s3 = make_s3_client(creds)
print(f"‚úÖ Credentials loaded: {creds['access_key'][:6]}...")

resp = s3.list_buckets()
buckets = [b['Name'] for b in resp.get('Buckets', [])]
print(f"‚úÖ Found {len(buckets)} buckets: {buckets}")

‚úÖ Credentials loaded: P0F9SE...
‚úÖ Found 4 buckets: ['bozkurt-abd03e71', 'tcn-livex-test', 'tensorstore-test', 'xray-data']


## 2. Core Logic Functions

This cell defines core utility functions for S3 operations and data management.

**Key Functions:**
- `format_size()`: Convert bytes to human-readable format (B, KB, MB, GB, TB, PB)
- `format_time()`: Format datetime objects to readable strings (YYYY-MM-DD HH:MM)
- `refresh_buckets()`: Refresh the list of available S3 buckets
- `list_objects()`: List objects from S3 bucket with pagination (max 5000 objects) and extract directory structure
- `filter_and_sort_objects()`: Filter objects by directory/keyword and apply sorting (by name, size, or date)
- `download_objects()`: Download selected objects to local directory with automatic filename conflict resolution

**Output:** Confirmation message that core functions are loaded

In [9]:
# Cell 2: Core logic functions (no UI)
import pandas as pd
from collections import Counter

MAX_LIST_OBJECTS = 5000
objects_cache = []
all_directories = []

def format_size(size_bytes):
    """Convert bytes to human readable format"""
    for unit in ['B', 'KB', 'MB', 'GB', 'TB']:
        if size_bytes < 1024.0:
            return f"{size_bytes:.2f} {unit}"
        size_bytes /= 1024.0
    return f"{size_bytes:.2f} PB"

def format_time(dt):
    """Format datetime to readable string"""
    if dt is None:
        return "N/A"
    return dt.strftime("%Y-%m-%d %H:%M")

def refresh_buckets():
    """Refresh bucket list from S3"""
    global s3, creds, buckets
    creds = load_credentials()
    s3 = make_s3_client(creds)
    resp = s3.list_buckets()
    buckets = [b['Name'] for b in resp.get('Buckets', [])]
    return buckets

def list_objects(bucket, prefix=''):
    """List objects from S3 and extract directories"""
    global objects_cache, all_directories
    
    paginator = s3.get_paginator('list_objects_v2')
    page_iter = paginator.paginate(
        Bucket=bucket, 
        Prefix=prefix,
        PaginationConfig={'PageSize': 1000}
    )
    
    objs = []
    for page in page_iter:
        for o in page.get('Contents', []):
            objs.append({
                'Key': o['Key'],
                'Size': o['Size'],
                'LastModified': o.get('LastModified')
            })
            if len(objs) >= MAX_LIST_OBJECTS:
                break
        if len(objs) >= MAX_LIST_OBJECTS:
            break
    
    objects_cache = objs
    
    # Extract directories
    dirs = set()
    for obj in objs:
        parts = obj['Key'].split('/')
        if len(parts) > 1:
            dirs.add('/'.join(parts[:-1]))
    
    all_directories = sorted(dirs)
    
    return objs, all_directories

def filter_and_sort_objects(directory='(All)', keyword='', sort_by='name_asc'):
    """Apply filters and sorting to cached objects"""
    if not objects_cache:
        return []
    
    filtered = objects_cache.copy()
    
    # Filter by directory
    if directory != '(All)':
        filtered = [obj for obj in filtered if obj['Key'].startswith(directory + '/')]
    
    # Filter by keyword
    if keyword:
        keyword_lower = keyword.lower()
        filtered = [obj for obj in filtered if keyword_lower in obj['Key'].lower()]
    
    # Sort
    if sort_by == 'name_asc':
        filtered.sort(key=lambda x: x['Key'])
    elif sort_by == 'name_desc':
        filtered.sort(key=lambda x: x['Key'], reverse=True)
    elif sort_by == 'size_asc':
        filtered.sort(key=lambda x: x['Size'])
    elif sort_by == 'size_desc':
        filtered.sort(key=lambda x: x['Size'], reverse=True)
    elif sort_by == 'date_asc':
        filtered.sort(key=lambda x: x['LastModified'])
    elif sort_by == 'date_desc':
        filtered.sort(key=lambda x: x['LastModified'], reverse=True)
    
    return filtered

def download_objects(bucket, object_indices, dest_dir):
    """Download selected objects to local directory"""
    os.makedirs(dest_dir, exist_ok=True)
    
    failed = []
    for cache_idx in object_indices:
        try:
            obj = objects_cache[cache_idx]
            key = obj['Key']
            fname = os.path.basename(key) or 'object'
            local = os.path.join(dest_dir, fname)
            
            base, ext = os.path.splitext(local)
            n = 1
            while os.path.exists(local):
                local = f"{base}({n}){ext}"
                n += 1
            
            s3.download_file(bucket, key, local)
        except Exception as e:
            failed.append((key, str(e)))
    
    return failed

print("‚úÖ Core functions loaded")

‚úÖ Core functions loaded


## 3. Visualization Functions

This cell provides HDF5 file visualization capabilities for X-ray imaging data.

**Key Functions:**
- `get_h5_files()`: Extract list of .h5 files from cached S3 objects
- `visualize_h5_from_s3()`: Load and visualize the last frame of an HDF5 file directly from S3 without downloading

**Processing Pipeline:**
- Loads HDF5 file from S3 using s3fs
- Extracts the last frame from specified dataset (default: 'narrowfov')
- Applies histogram equalization for better visualization
- Displays frame in large format (15x15 inches)

**Dependencies:** s3fs, h5py, hdf5plugin, scikit-image, matplotlib

**Output:** Confirmation message that visualization functions are loaded

In [18]:
# Cell 3: Visualization functions (updated - last frame only, 2000x2000)
import matplotlib.pyplot as plt
import numpy as np
from skimage import exposure
import gc

try:
    import s3fs
    import h5py
    import hdf5plugin
except ImportError as e:
    print("Missing dependencies. Please install:")
    print("  pip install s3fs h5py hdf5plugin scikit-image")
    raise e

def get_h5_files():
    """Get list of h5 files from cached objects"""
    if not objects_cache:
        return []
    return [(obj['Key'], idx) for idx, obj in enumerate(objects_cache) if obj['Key'].endswith('.h5')]

def visualize_h5_from_s3(bucket, file_key, dataset_name='narrowfov'):
    """Visualize last frame of h5 file directly from S3 (2000x2000 display)"""
    
    plt.close('all')
    
    s3fs_client = s3fs.S3FileSystem(
        key=creds['access_key'],
        secret=creds['secret_key'],
        client_kwargs={'endpoint_url': ECHO_ENDPOINT}
    )
    
    s3_path = f"{bucket}/{file_key}"
    
    with s3fs_client.open(s3_path, 'rb') as f:
        with h5py.File(f, 'r') as hf:
            if dataset_name not in hf:
                available = list(hf.keys())
                raise KeyError(f"Dataset '{dataset_name}' not found. Available: {available}")
            
            dataset = hf[dataset_name]
            total_frames = dataset.shape[0]
            
            # Read last frame
            last_frame_idx = total_frames - 1
            frame = dataset[last_frame_idx]
            
            # Process frame
            frame_8bit = ((frame - frame.min()) / (frame.max() - frame.min()) * 255).astype(np.uint8)
            frame_eq = exposure.equalize_hist(frame_8bit)
            
            # Visualize with large size (20x20 inches = 2000x2000 pixels at DPI 100)
            fig, ax = plt.subplots(1, 1, figsize=(15, 15))
            
            ax.imshow(frame_eq, cmap='gray')
            ax.set_title(f'Frame {last_frame_idx} (Last Frame)', fontsize=20)
            ax.axis('off')
            
            plt.tight_layout()
            plt.show()
            
            plt.close(fig)
            gc.collect()
            
            return total_frames, last_frame_idx

print("‚úÖ Visualization functions loaded")

‚úÖ Visualization functions loaded


## 4. S3 Browser UI (ipywidgets)

This cell creates an interactive S3 object browser using ipywidgets.

**UI Components:**

**Row 1 - Bucket & List Controls:**
- Bucket selector dropdown
- Refresh buckets button
- Prefix input (for filtering by path prefix)(can leave it blank)
- List objects button

**Row 2 - Filter Controls:**
- Directory filter dropdown
- Keyword search text input
- Sort options (by name, size, or date - ascending/descending)

**Row 3 - Object List:**
- Multi-select list showing: index, key, size, last modified time

**Row 4 - Actions:**
- Local directory input
- Download selected button
- Dataset name input (for h5 files)
- Visualize last frame button (for h5 files)

**Features:**
- Real-time filtering and sorting
- Automatic directory structure extraction
- Multi-file selection and batch download
- Direct H5 visualization from S3
- Status messages with success/error indicators

**Output:** Interactive UI with status messages

In [19]:
# Cell 4: Complete UI (update button text)
import ipywidgets as widgets
from IPython.display import display, clear_output

# Status outputs
main_status = widgets.Output(layout={'border': '1px solid gray', 'max_height': '120px', 'overflow': 'auto'})

def set_main_status(msg, error=False):
    with main_status:
        clear_output(wait=True)
        print(("‚ùå " if error else "‚ÑπÔ∏è ") + msg)

# ============ S3 Browser Section ============
print("=== S3 Object Browser & Visualization ===")

# Row 1: Bucket and list controls
buckets_dropdown = widgets.Dropdown(options=buckets, description='Bucket:')
refresh_btn = widgets.Button(description='Refresh Buckets', button_style='info')
prefix_text = widgets.Text(value='', placeholder='e.g. results/', description='Prefix:')
list_btn = widgets.Button(description='List Objects', button_style='primary')

# Row 2: Filter controls
dir_dropdown = widgets.Dropdown(options=['(All)'], value='(All)', description='Directory:')
filter_text = widgets.Text(value='', placeholder='e.g. narrowfov', description='Keyword:')
sort_dropdown = widgets.Dropdown(
    options=[
        ('Name A-Z', 'name_asc'),
        ('Name Z-A', 'name_desc'),
        ('Size Small-Large', 'size_asc'),
        ('Size Large-Small', 'size_desc'),
        ('Date New-Old', 'date_desc'),
        ('Date Old-New', 'date_asc')
    ],
    value='name_asc',
    description='Sort:'
)

# Objects display
objects_select = widgets.SelectMultiple(
    options=[], 
    rows=20,
    description='Objects', 
    layout=widgets.Layout(width='100%')
)

# Action controls
dest_text = widgets.Text(value='./downloads', description='Local Dir:', layout=widgets.Layout(width='25%'))
download_btn = widgets.Button(description='Download Selected', button_style='success')
dataset_text = widgets.Text(value='narrowfov', description='Dataset:', placeholder='e.g. narrowfov', layout=widgets.Layout(width='25%'))
viz_btn = widgets.Button(description='Visualize Last Frame', button_style='warning')

# Visualization output
viz_output = widgets.Output()

# Layout
controls_row1 = widgets.HBox([buckets_dropdown, refresh_btn, prefix_text, list_btn])
controls_row2 = widgets.HBox([dir_dropdown, filter_text, sort_dropdown])
actions_row = widgets.HBox([dest_text, download_btn, dataset_text, viz_btn])

display(controls_row1)
display(controls_row2)
display(objects_select)
display(actions_row)
display(main_status)
display(viz_output)

# ============ Event Handlers ============

def apply_filters():
    """Apply directory, keyword filter and sorting"""
    filtered = filter_and_sort_objects(
        directory=dir_dropdown.value,
        keyword=filter_text.value.strip(),
        sort_by=sort_dropdown.value
    )
    
    if not filtered:
        objects_select.options = []
        set_main_status("No objects match current filters", error=True)
        return
    
    # Update objects list
    opts = [(f"{i}: {filtered[i]['Key']} ({format_size(filtered[i]['Size'])}) [{format_time(filtered[i]['LastModified'])}]", 
             objects_cache.index(filtered[i])) 
            for i in range(len(filtered))]
    objects_select.options = opts
    
    set_main_status(f"Showing {len(filtered)} objects (from {len(objects_cache)} total)")

def refresh_buckets_action(_=None):
    global buckets
    set_main_status("Refreshing buckets...")
    try:
        buckets = refresh_buckets()
        buckets_dropdown.options = buckets
        if buckets:
            buckets_dropdown.value = buckets[0]
        set_main_status(f"Found {len(buckets)} buckets")
    except Exception as e:
        set_main_status(f"Refresh failed: {e}", error=True)

def list_objects_action(_=None):
    bucket = buckets_dropdown.value
    prefix = prefix_text.value.strip() or ''
    
    set_main_status(f"Listing objects in bucket={bucket} prefix={prefix}...")
    objects_select.options = []
    
    try:
        objs, dirs = list_objects(bucket, prefix)
        
        if not objs:
            set_main_status("No objects found", error=True)
            return
        
        # Update directory dropdown
        dir_dropdown.options = ['(All)'] + dirs
        dir_dropdown.value = '(All)'
        
        set_main_status(f"Loaded {len(objs)} objects from {len(dirs)} directories")
        apply_filters()
        
    except Exception as e:
        set_main_status(f"List failed: {e}", error=True)
        raise

def download_selected_action(_=None):
    if not objects_cache:
        set_main_status("No objects cached, please list first", error=True)
        return
    
    sel = list(objects_select.value)
    if not sel:
        set_main_status("No objects selected", error=True)
        return
    
    dest_dir = dest_text.value.strip() or './downloads'
    bucket = buckets_dropdown.value
    
    set_main_status(f"Downloading {len(sel)} objects to {dest_dir}...")
    
    try:
        failed = download_objects(bucket, sel, dest_dir)
        
        if failed:
            set_main_status(f"Download completed with {len(failed)} failures", error=True)
            with main_status:
                print("Failed:")
                for k, err in failed:
                    print(f"  - {k}: {err}")
        else:
            set_main_status("All downloads completed ‚úÖ")
    except Exception as e:
        set_main_status(f"Download failed: {e}", error=True)

def visualize_h5_action(_=None):
    if not objects_cache:
        set_main_status("No objects cached, please list first", error=True)
        return
    
    sel = list(objects_select.value)
    if not sel:
        set_main_status("No objects selected", error=True)
        return
    
    # Get first selected object
    cache_idx = sel[0]
    obj = objects_cache[cache_idx]
    file_key = obj['Key']
    
    # Check if it's an h5 file
    if not file_key.endswith('.h5'):
        set_main_status(f"Selected file is not h5: {file_key}", error=True)
        return
    
    # Warn if multiple files selected
    if len(sel) > 1:
        set_main_status(f"Multiple files selected, visualizing first h5 file: {file_key}")
    
    bucket = buckets_dropdown.value
    dataset_name = dataset_text.value.strip()
    
    if not dataset_name:
        set_main_status("Please specify dataset name (e.g. narrowfov)", error=True)
        return
    
    set_main_status(f"Loading {file_key} from S3...")
    viz_output.clear_output()
    
    try:
        with viz_output:
            total_frames, last_frame_idx = visualize_h5_from_s3(bucket, file_key, dataset_name)
        
        set_main_status(f"‚úÖ Visualized {file_key}: frame {last_frame_idx} (total: {total_frames} frames)")
        
    except Exception as e:
        set_main_status(f"Visualization failed: {e}", error=True)
        with main_status:
            import traceback
            traceback.print_exc()

# Bind events
refresh_btn.on_click(refresh_buckets_action)
list_btn.on_click(list_objects_action)
download_btn.on_click(download_selected_action)
viz_btn.on_click(visualize_h5_action)

# Real-time filter updates
dir_dropdown.observe(lambda _: apply_filters(), names='value')
filter_text.observe(lambda _: apply_filters(), names='value')
sort_dropdown.observe(lambda _: apply_filters(), names='value')

set_main_status("UI ready. Select bucket/prefix and click 'List Objects'")

=== S3 Object Browser & Visualization ===


HBox(children=(Dropdown(description='Bucket:', options=('bozkurt-abd03e71', 'tcn-livex-test', 'tensorstore-tes‚Ä¶

HBox(children=(Dropdown(description='Directory:', options=('(All)',), value='(All)'), Text(value='', descripti‚Ä¶

SelectMultiple(description='Objects', layout=Layout(width='100%'), options=(), rows=20, value=())

HBox(children=(Text(value='./downloads', description='Local Dir:', layout=Layout(width='25%')), Button(button_‚Ä¶

Output(layout=Layout(border_bottom='1px solid gray', border_left='1px solid gray', border_right='1px solid gra‚Ä¶

Output()

## 5. H5 Frame Browser

This cell provides an interactive frame-by-frame browser for HDF5 files on S3.

**UI Components:**

**Initialization:**
- "Load Selected H5 File" button - loads file info from Cell 4's selection

**Navigation Controls:**
- Frame slider - scrub through frames smoothly
- Frame number input - jump to specific frame
- First/Previous/Next/Last buttons - frame navigation
- Frame info display - shows file name, dataset, total frames

**Features:**
- Loads file metadata without downloading entire file
- Displays frames with histogram equalization
- Interactive slider for smooth browsing
- Large visualization (12x12 inches)
- Real-time frame loading from S3

**Requirements:**
- Must run Cell 4 first and select an .h5 file

**Output:** Interactive frame browser with large frame display

In [None]:
# Cell 5: Frame Browser - Browse individual frames with slider
import ipywidgets as widgets
from IPython.display import display, clear_output
import matplotlib.pyplot as plt
from skimage import exposure

# Status and display
browser_status = widgets.Output(layout={'border': '1px solid gray', 'padding': '5px'})
browser_output = widgets.Output()

def set_browser_status(msg, error=False):
    with browser_status:
        clear_output(wait=True)
        print(("‚ùå " if error else "‚ÑπÔ∏è ") + msg)

# Global state
current_file_key = None
current_bucket = None
current_dataset = None
total_frames_count = 0

print("=== H5 Frame Browser ===")

# Initialize button
init_btn = widgets.Button(description='Load Selected H5 File', button_style='primary', icon='download')

# Frame controls (initially disabled)
frame_slider = widgets.IntSlider(
    value=0,
    min=0,
    max=0,
    step=1,
    description='Frame:',
    continuous_update=False,  # Only update on mouse release
    layout=widgets.Layout(width='60%'),
    disabled=True
)

frame_input = widgets.IntText(
    value=0,
    description='Go to:',
    layout=widgets.Layout(width='150px'),
    disabled=True
)

# Navigation buttons
first_btn = widgets.Button(description='First', button_style='', icon='fast-backward', disabled=True)
prev_btn = widgets.Button(description='Previous', button_style='', icon='step-backward', disabled=True)
next_btn = widgets.Button(description='Next', button_style='', icon='step-forward', disabled=True)
last_btn = widgets.Button(description='Last', button_style='', icon='fast-forward', disabled=True)

# Frame info display
frame_info = widgets.HTML(value='<b>No file loaded</b>')

# Layout
control_row1 = widgets.HBox([init_btn])
control_row2 = widgets.HBox([frame_slider])
control_row3 = widgets.HBox([first_btn, prev_btn, frame_input, next_btn, last_btn, frame_info])

display(control_row1)
display(browser_status)
display(control_row2)
display(control_row3)
display(browser_output)

# ============ Functions ============

def initialize_browser():
    """Load selected h5 file from Cell 4"""
    global current_file_key, current_bucket, current_dataset, total_frames_count
    
    # Check if objects_cache exists
    if 'objects_cache' not in globals() or not objects_cache:
        set_browser_status("No objects loaded. Please run Cell 4 and list objects first.", error=True)
        return False
    
    # Get selected objects
    sel = list(objects_select.value)
    if not sel:
        set_browser_status("No object selected in Cell 4. Please select an h5 file.", error=True)
        return False
    
    # Get first selected object
    cache_idx = sel[0]
    obj = objects_cache[cache_idx]
    file_key = obj['Key']
    
    # Check if it's an h5 file
    if not file_key.endswith('.h5'):
        set_browser_status(f"Selected file is not h5: {file_key}", error=True)
        return False
    
    # Get bucket and dataset
    current_bucket = buckets_dropdown.value
    current_dataset = dataset_text.value.strip()
    
    if not current_dataset:
        set_browser_status("Please specify dataset name in Cell 4 (e.g. narrowfov)", error=True)
        return False
    
    current_file_key = file_key
    
    set_browser_status(f"Loading {file_key} metadata...")
    
    try:
        # Open file and get frame count
        import s3fs
        s3_fs = s3fs.S3FileSystem(
            key=creds['access_key'],
            secret=creds['secret_key'],
            client_kwargs={'endpoint_url': ECHO_ENDPOINT}
        )
        
        with s3_fs.open(f"{current_bucket}/{current_file_key}", 'rb') as f:
            import h5py
            import hdf5plugin
            
            with h5py.File(f, 'r') as hf:
                if current_dataset not in hf:
                    available = list(hf.keys())
                    set_browser_status(f"Dataset '{current_dataset}' not found. Available: {available}", error=True)
                    return False
                
                dataset = hf[current_dataset]
                total_frames_count = dataset.shape[0]
        
        # Enable controls
        frame_slider.max = total_frames_count - 1
        frame_slider.value = total_frames_count - 1  # Start at last frame
        frame_slider.disabled = False
        
        frame_input.max = total_frames_count - 1
        frame_input.value = total_frames_count - 1
        frame_input.disabled = False
        
        first_btn.disabled = False
        prev_btn.disabled = False
        next_btn.disabled = False
        last_btn.disabled = False
        
        # Update info
        frame_info.value = f'<b>Loaded:</b> {file_key}<br><b>Dataset:</b> {current_dataset}<br><b>Total frames:</b> {total_frames_count}'
        
        set_browser_status(f"‚úÖ Loaded {file_key} ({total_frames_count} frames)")
        
        # Display initial frame
        display_frame(frame_slider.value)
        
        return True
        
    except Exception as e:
        set_browser_status(f"Failed to load file: {e}", error=True)
        import traceback
        with browser_status:
            traceback.print_exc()
        return False

def display_frame(frame_idx):
    """Display a single frame with histogram equalization"""
    if current_file_key is None:
        set_browser_status("No file loaded", error=True)
        return
    
    if frame_idx < 0 or frame_idx >= total_frames_count:
        set_browser_status(f"Invalid frame index: {frame_idx}", error=True)
        return
    
    set_browser_status(f"Loading frame {frame_idx}...")
    browser_output.clear_output(wait=True)
    
    try:
        import s3fs
        s3_fs = s3fs.S3FileSystem(
            key=creds['access_key'],
            secret=creds['secret_key'],
            client_kwargs={'endpoint_url': ECHO_ENDPOINT}
        )
        
        with s3_fs.open(f"{current_bucket}/{current_file_key}", 'rb') as f:
            import h5py
            import hdf5plugin
            
            with h5py.File(f, 'r') as hf:
                dataset = hf[current_dataset]
                frame = dataset[frame_idx]
        
        # Process frame
        frame_8bit = ((frame - frame.min()) / (frame.max() - frame.min()) * 255).astype('uint8')
        frame_eq = exposure.equalize_hist(frame_8bit)
        
        # Display
        with browser_output:
            fig, ax = plt.subplots(figsize=(12, 12))
            ax.imshow(frame_eq, cmap='gray')
            ax.set_title(f'Frame {frame_idx} / {total_frames_count - 1}', fontsize=16)
            ax.axis('off')
            plt.tight_layout()
            plt.show()
        
        set_browser_status(f"‚úÖ Displaying frame {frame_idx}")
        
    except Exception as e:
        set_browser_status(f"Failed to display frame: {e}", error=True)
        import traceback
        with browser_status:
            traceback.print_exc()

# ============ Event Handlers ============

def on_init_clicked(_):
    initialize_browser()

def on_slider_change(change):
    """Triggered when slider is released"""
    display_frame(change['new'])
    frame_input.value = change['new']

def on_input_change(change):
    """Triggered when frame number is entered"""
    frame_slider.value = change['new']
    display_frame(change['new'])

def on_first_clicked(_):
    frame_slider.value = 0
    frame_input.value = 0
    display_frame(0)

def on_prev_clicked(_):
    new_val = max(0, frame_slider.value - 1)
    frame_slider.value = new_val
    frame_input.value = new_val
    display_frame(new_val)

def on_next_clicked(_):
    new_val = min(total_frames_count - 1, frame_slider.value + 1)
    frame_slider.value = new_val
    frame_input.value = new_val
    display_frame(new_val)

def on_last_clicked(_):
    last_idx = total_frames_count - 1
    frame_slider.value = last_idx
    frame_input.value = last_idx
    display_frame(last_idx)

# Bind events
init_btn.on_click(on_init_clicked)
frame_slider.observe(on_slider_change, names='value')
frame_input.observe(on_input_change, names='value')
first_btn.on_click(on_first_clicked)
prev_btn.on_click(on_prev_clicked)
next_btn.on_click(on_next_clicked)
last_btn.on_click(on_last_clicked)

set_browser_status("Click 'Load Selected H5 File' to start browsing")

=== H5 Frame Browser ===


HBox(children=(Button(button_style='primary', description='Load Selected H5 File', icon='download', style=Butt‚Ä¶

Output(layout=Layout(border_bottom='1px solid gray', border_left='1px solid gray', border_right='1px solid gra‚Ä¶

HBox(children=(IntSlider(value=0, continuous_update=False, description='Frame:', disabled=True, layout=Layout(‚Ä¶

HBox(children=(Button(description='First', disabled=True, icon='fast-backward', style=ButtonStyle()), Button(d‚Ä¶

Output()

## 6. Advanced Frame Processing Pipeline (MATLAB Style)

This cell implements a complete image processing pipeline for X-ray imaging data following MATLAB conventions, designed to preserve contrast information for quantitative analysis (e.g., liquid/solid phase ratio).

**Processing Pipeline (MATLAB Style - Fixed Steps):**

1. **Normalize to [0,1]:**
   - Converts uint16 [0-65535] to float32 [0-1] by dividing by 65535
   - Ensures numerical stability for subsequent operations
   
2. **Flat Field Correction:**
   - Computes flat field from 20 frames before start frame (uses mean)
   - Normalizes flat field to [0,1] 
   - Corrects each frame: `corrected = normalized / flat_normalized`
   
3. **Linear Stretch (1%/99% Percentiles):**
   - Calculates 1st and 99th percentiles of corrected image
   - Linear mapping: `stretched = (corrected - p1) / (p99 - p1)`
   - **Preserves histogram shape and contrast structure** (unlike histogram equalization)
   
4. **Gaussian Smoothing:**
   - Applies Gaussian filter to reduce noise
   - Adjustable œÉ parameter (default: 4.0)
   - All processing remains in float32 format


**UI Parameters:**
- Start Frame (min: 20, for flat field computation)
- End Frame
- Step (frame interval for batch processing)
- Gaussian œÉ (smoothing strength)
- **Save Format:** PNG (8-bit) / TIFF (32-bit float) / NPY (numpy array)
- Save Directory (where to save processed frames)

**Actions:**
- **Preview Last Frame:** Shows 5-panel visualization with histograms at each processing step
- **Batch Process & Save:** Processes all frames in range and saves in chosen format

**Preview Output (5 rows):**
- Row 1: Original frame (uint16) + histogram
- Row 2: Normalized [0,1] + histogram
- Row 3: After flat field correction + histogram  
- Row 4: After linear stretch (1%/99%) + histogram
- Row 5: Final result (after Gaussian filter) + histogram

**Save Format Options:**
- **PNG (8-bit):** Small files, good compatibility, loses precision
- **TIFF (32-bit float):** Preserves precision, larger files
- **NPY (numpy):** Full precision, Python native format

**Batch Processing:**
- Processes frames with progress bar
- Saves each frame: `frame_XXXXXX.png/tiff/npy`
- Reports success/failure count

**Requirements:**
- Must run Cell 4 first and select an .h5 file
- Start frame must be ‚â• 20 (needed for flat field computation)



**Output:** Interactive processing UI with preview and batch capabilities

In [16]:
# Cell 6: Advanced Frame Processing - MATLAB Style Pipeline (Bug Fixed)
# Pipeline: Normalize -> Flat Field -> Percentile Stretch -> Gaussian Filter

import ipywidgets as widgets
from IPython.display import display, clear_output
import matplotlib.pyplot as plt
from skimage import exposure
import cv2
import numpy as np
import os
from pathlib import Path

# Status and display
process_status = widgets.Output(layout={'border': '1px solid gray', 'padding': '5px'})
process_output = widgets.Output()

def set_process_status(msg, error=False):
    with process_status:
        clear_output(wait=True)
        print(("‚ùå " if error else "‚ÑπÔ∏è ") + msg)

# Global state
flat_field = None
process_file_key = None
process_bucket = None
process_dataset = None
# NEW: Track current file to detect changes
current_file_id = None

print("=== Advanced Frame Processing (MATLAB Style) ===")

# Frame range parameters
start_frame = widgets.IntText(value=100, description='Start Frame:', layout=widgets.Layout(width='200px'))
end_frame = widgets.IntText(value=200, description='End Frame:', layout=widgets.Layout(width='200px'))
step_frame = widgets.IntText(value=1, description='Step:', layout=widgets.Layout(width='200px'))

# Gaussian filter parameter
gaussian_sigma = widgets.FloatText(value=4.0, description='Gaussian œÉ:', layout=widgets.Layout(width='200px'))

# Save format option
save_format = widgets.Dropdown(
    options=[('8-bit PNG', 'png'), ('32-bit Float TIFF', 'tiff'), ('NumPy Array (.npy)', 'npy')],
    value='png',
    description='Save Format:',
    layout=widgets.Layout(width='300px')
)

# Save directory
save_dir = widgets.Text(value='./processed_frames', description='Save Dir:', layout=widgets.Layout(width='400px'))

# Action buttons
preview_btn = widgets.Button(description='Preview Last Frame', button_style='info', icon='eye')
batch_btn = widgets.Button(description='Batch Process & Save', button_style='warning', icon='cogs', disabled=True)
reset_btn = widgets.Button(description='Reset Flat Field', button_style='danger', icon='refresh', 
                           tooltip='Clear cached flat field (auto-clears when file changes)')

# Progress bar
progress_bar = widgets.IntProgress(
    value=0,
    min=0,
    max=100,
    description='Progress:',
    bar_style='',
    layout=widgets.Layout(width='50%')
)

# Layout
param_row1 = widgets.HBox([start_frame, end_frame, step_frame])
param_row2 = widgets.HBox([gaussian_sigma, save_format])
save_row = widgets.HBox([save_dir])
action_row = widgets.HBox([preview_btn, batch_btn, reset_btn])
progress_row = widgets.HBox([progress_bar])

display(param_row1)
display(param_row2)
display(save_row)
display(action_row)
display(progress_row)
display(process_status)
display(process_output)

# ============ Functions ============

def reset_flat_field():
    """Reset flat field cache"""
    global flat_field, current_file_id
    flat_field = None
    current_file_id = None
    set_process_status("‚úÖ Flat field cache cleared")

def load_file_info():
    """Load file info from Cell 4 and detect file changes"""
    global process_file_key, process_bucket, process_dataset, flat_field, current_file_id
    
    if 'objects_cache' not in globals() or not objects_cache:
        set_process_status("No objects loaded. Please run Cell 4 and list objects first.", error=True)
        return False
    
    sel = list(objects_select.value)
    if not sel:
        set_process_status("No object selected in Cell 4. Please select an h5 file.", error=True)
        return False
    
    cache_idx = sel[0]
    obj = objects_cache[cache_idx]
    file_key = obj['Key']
    
    if not file_key.endswith('.h5'):
        set_process_status(f"Selected file is not h5: {file_key}", error=True)
        return False
    
    bucket = buckets_dropdown.value
    dataset = dataset_text.value.strip()
    
    if not dataset:
        set_process_status("Please specify dataset name in Cell 4 (e.g. narrowfov)", error=True)
        return False
    
    # NEW: Create file identifier and check if file changed
    new_file_id = f"{bucket}/{file_key}/{dataset}"
    
    if current_file_id is not None and current_file_id != new_file_id:
        # File changed! Clear flat field
        set_process_status(f"üîÑ File changed detected. Clearing flat field cache...", error=False)
        flat_field = None
    
    # Update global state
    process_file_key = file_key
    process_bucket = bucket
    process_dataset = dataset
    current_file_id = new_file_id
    
    return True

def compute_flat_field(start_idx):
    """Compute flat field from 20 frames before start_idx"""
    if start_idx < 20:
        set_process_status(f"Start frame must be >= 20 (need 20 frames for flat field)", error=True)
        return None
    
    set_process_status(f"Computing flat field from frames {start_idx-20} to {start_idx-1}...")
    
    try:
        import s3fs
        import h5py
        import hdf5plugin
        
        s3_fs = s3fs.S3FileSystem(
            key=creds['access_key'],
            secret=creds['secret_key'],
            client_kwargs={'endpoint_url': ECHO_ENDPOINT}
        )
        
        with s3_fs.open(f"{process_bucket}/{process_file_key}", 'rb') as f:
            with h5py.File(f, 'r') as hf:
                dataset = hf[process_dataset]
                frames = dataset[start_idx-20:start_idx]
                flat = np.mean(frames, axis=0)
        
        set_process_status(f"‚úÖ Flat field computed from 20 frames")
        return flat
        
    except Exception as e:
        set_process_status(f"Failed to compute flat field: {e}", error=True)
        import traceback
        with process_status:
            traceback.print_exc()
        return None

def process_single_frame(frame_data, flat_field, gaussian_sigma):
    """
    MATLAB-style processing pipeline:
    1. Original uint16 image
    2. Normalize to [0,1] by dividing by 65535
    3. Flat field correction: normalized / flat_normalized
    4. Linear stretch based on 1%/99% percentiles
    5. Gaussian filter
    
    Returns all intermediate results in float32 format
    """
    # Step 1: Original (uint16)
    original = frame_data.copy()
    
    # Step 2: Normalize to [0,1]
    normalized = frame_data.astype(np.float32) / 65535.0
    flat_normalized = flat_field.astype(np.float32) / 65535.0
    
    # Step 3: Flat field correction
    epsilon = 1e-10
    corrected = normalized / (flat_normalized + epsilon)
    
    # Step 4: Linear stretch (1%/99% percentiles)
    p1, p99 = np.percentile(corrected, [1, 99])
    stretched = np.clip((corrected - p1) / (p99 - p1 + epsilon), 0, 1)
    
    # Step 5: Gaussian filter
    filtered = cv2.GaussianBlur(stretched, (0, 0), sigmaX=gaussian_sigma)
    
    return {
        'original': original,           # uint16
        'normalized': normalized,       # float32 [0,1]
        'corrected': corrected,         # float32
        'stretched': stretched,         # float32 [0,1]
        'filtered': filtered,           # float32 [0,1]
        'p1': p1,
        'p99': p99
    }

def preview_last_frame():
    """Preview processing on the last frame"""
    global flat_field
    
    if not load_file_info():
        return
    
    start_idx = start_frame.value
    end_idx = end_frame.value
    
    if start_idx >= end_idx:
        set_process_status("Start frame must be < End frame", error=True)
        return
    
    if start_idx < 20:
        set_process_status("Start frame must be >= 20 (for flat field computation)", error=True)
        return
    
    set_process_status(f"Loading last frame {end_idx}...")
    
    try:
        import s3fs
        import h5py
        import hdf5plugin
        
        # Compute flat field if needed
        if flat_field is None:
            flat_field = compute_flat_field(start_idx)
            if flat_field is None:
                return
        else:
            set_process_status(f"‚ÑπÔ∏è Using cached flat field (from previous run)")
        
        # Load last frame
        s3_fs = s3fs.S3FileSystem(
            key=creds['access_key'],
            secret=creds['secret_key'],
            client_kwargs={'endpoint_url': ECHO_ENDPOINT}
        )
        
        with s3_fs.open(f"{process_bucket}/{process_file_key}", 'rb') as f:
            with h5py.File(f, 'r') as hf:
                dataset = hf[process_dataset]
                frame_raw = dataset[end_idx]
        
        # Get processing parameters
        sigma = gaussian_sigma.value
        
        # Process frame
        set_process_status(f"Processing frame {end_idx}...")
        results = process_single_frame(frame_raw, flat_field, sigma)
        
        # Display results - 5 rows x 2 columns
        with process_output:
            clear_output(wait=True)
            fig, axes = plt.subplots(5, 2, figsize=(16, 25))
            
            # Helper function to convert float to displayable format
            def float_to_display(img_float):
                """Convert float [0,1] to uint8 for display"""
                return (np.clip(img_float, 0, 1) * 255).astype(np.uint8)
            
            # Row 1: Original frame (uint16)
            original_display = float_to_display(results['original'].astype(np.float32) / 65535.0)
            axes[0, 0].imshow(original_display, cmap='gray', vmin=0, vmax=255)
            axes[0, 0].set_title(f'1. Original Frame {end_idx} (uint16)\nRange: [{results["original"].min()}, {results["original"].max()}]', fontsize=12)
            axes[0, 0].axis('off')
            
            axes[0, 1].hist(results['original'].ravel(), bins=256, color='black', alpha=0.7)
            axes[0, 1].set_title('Histogram - Original (uint16)', fontsize=12)
            axes[0, 1].set_xlabel('Pixel Value')
            axes[0, 1].set_ylabel('Frequency')
            
            # Row 2: Normalized [0,1]
            normalized_display = float_to_display(results['normalized'])
            axes[1, 0].imshow(normalized_display, cmap='gray', vmin=0, vmax=255)
            axes[1, 0].set_title(f'2. Normalized (√∑65535)\nRange: [{results["normalized"].min():.4f}, {results["normalized"].max():.4f}]', fontsize=12)
            axes[1, 0].axis('off')
            
            axes[1, 1].hist(results['normalized'].ravel(), bins=256, color='black', alpha=0.7)
            axes[1, 1].set_title('Histogram - Normalized', fontsize=12)
            axes[1, 1].set_xlabel('Pixel Value')
            axes[1, 1].set_ylabel('Frequency')
            
            # Row 3: After flat field correction
            corrected_display = float_to_display((results['corrected'] - results['corrected'].min()) / 
                                                 (results['corrected'].max() - results['corrected'].min() + 1e-10))
            axes[2, 0].imshow(corrected_display, cmap='gray', vmin=0, vmax=255)
            axes[2, 0].set_title(f'3. After Flat Field Correction\nRange: [{results["corrected"].min():.4f}, {results["corrected"].max():.4f}]', fontsize=12)
            axes[2, 0].axis('off')
            
            axes[2, 1].hist(results['corrected'].ravel(), bins=256, color='black', alpha=0.7)
            axes[2, 1].set_title('Histogram - After Flat Field', fontsize=12)
            axes[2, 1].set_xlabel('Pixel Value')
            axes[2, 1].set_ylabel('Frequency')
            
            # Row 4: After linear stretch (1%/99% percentiles)
            stretched_display = float_to_display(results['stretched'])
            axes[3, 0].imshow(stretched_display, cmap='gray', vmin=0, vmax=255)
            axes[3, 0].set_title(f'4. After Linear Stretch (1%/99% Percentiles)\nP1={results["p1"]:.4f}, P99={results["p99"]:.4f}', fontsize=12)
            axes[3, 0].axis('off')
            
            axes[3, 1].hist(results['stretched'].ravel(), bins=256, color='black', alpha=0.7)
            axes[3, 1].set_title('Histogram - After Stretch', fontsize=12)
            axes[3, 1].set_xlabel('Pixel Value')
            axes[3, 1].set_ylabel('Frequency')
            
            # Row 5: Final result (after Gaussian filter)
            filtered_display = float_to_display(results['filtered'])
            axes[4, 0].imshow(filtered_display, cmap='gray', vmin=0, vmax=255)
            axes[4, 0].set_title(f'5. Final Result (Gaussian Filter œÉ={sigma})\nRange: [{results["filtered"].min():.4f}, {results["filtered"].max():.4f}]', fontsize=12)
            axes[4, 0].axis('off')
            
            axes[4, 1].hist(results['filtered'].ravel(), bins=256, color='black', alpha=0.7)
            axes[4, 1].set_title('Histogram - Final Result', fontsize=12)
            axes[4, 1].set_xlabel('Pixel Value')
            axes[4, 1].set_ylabel('Frequency')
            
            plt.tight_layout()
            plt.show()
        
        set_process_status(f"‚úÖ Preview complete. File: {process_file_key}, Dataset: {process_dataset}")
        batch_btn.disabled = False
        
    except Exception as e:
        set_process_status(f"Preview failed: {e}", error=True)
        import traceback
        with process_status:
            traceback.print_exc()

def batch_process_and_save():
    """Batch process all frames and save directly to disk"""
    
    if flat_field is None:
        set_process_status("Please preview first to compute flat field", error=True)
        return
    
    start_idx = start_frame.value
    end_idx = end_frame.value
    step = step_frame.value
    
    if step < 1:
        set_process_status("Step must be >= 1", error=True)
        return
    
    # Check save directory
    output_dir = Path(save_dir.value)
    if not save_dir.value.strip():
        set_process_status("Please specify save directory", error=True)
        return
    
    # Create output directory
    output_dir.mkdir(parents=True, exist_ok=True)
    
    # Get parameters
    sigma = gaussian_sigma.value
    fmt = save_format.value
    
    # Calculate frame indices
    frame_indices = list(range(start_idx, end_idx + 1, step))
    total_frames = len(frame_indices)
    
    set_process_status(f"Processing and saving {total_frames} frames to {output_dir}...")
    
    # Setup progress bar
    progress_bar.max = total_frames
    progress_bar.value = 0
    
    saved_count = 0
    failed_count = 0
    
    try:
        import s3fs
        import h5py
        import hdf5plugin
        
        s3_fs = s3fs.S3FileSystem(
            key=creds['access_key'],
            secret=creds['secret_key'],
            client_kwargs={'endpoint_url': ECHO_ENDPOINT}
        )
        
        with s3_fs.open(f"{process_bucket}/{process_file_key}", 'rb') as f:
            with h5py.File(f, 'r') as hf:
                dataset = hf[process_dataset]
                
                for i, frame_idx in enumerate(frame_indices):
                    try:
                        frame_raw = dataset[frame_idx]
                        results = process_single_frame(frame_raw, flat_field, sigma)
                        
                        # Save based on format
                        if fmt == 'png':
                            # Convert to 8-bit
                            frame_8bit = (results['filtered'] * 255).astype(np.uint8)
                            filename = output_dir / f"frame_{frame_idx:06d}.png"
                            cv2.imwrite(str(filename), frame_8bit)
                        
                        elif fmt == 'tiff':
                            # Save as 32-bit float TIFF
                            filename = output_dir / f"frame_{frame_idx:06d}.tiff"
                            cv2.imwrite(str(filename), results['filtered'].astype(np.float32))
                        
                        elif fmt == 'npy':
                            # Save as numpy array
                            filename = output_dir / f"frame_{frame_idx:06d}.npy"
                            np.save(str(filename), results['filtered'])
                        
                        saved_count += 1
                        
                    except Exception as e:
                        failed_count += 1
                        set_process_status(f"Failed to process frame {frame_idx}: {e}", error=True)
                    
                    progress_bar.value = i + 1
                    
                    if (i + 1) % 10 == 0 or (i + 1) == total_frames:
                        set_process_status(f"Processed {i + 1}/{total_frames} frames... (saved: {saved_count}, failed: {failed_count})")
        
        if failed_count == 0:
            set_process_status(f"‚úÖ All {saved_count} frames processed and saved to {output_dir} ({fmt} format)")
        else:
            set_process_status(f"‚ö†Ô∏è Completed with {failed_count} failures. {saved_count} frames saved to {output_dir}", error=True)
        
    except Exception as e:
        set_process_status(f"Batch processing failed: {e}", error=True)
        import traceback
        with process_status:
            traceback.print_exc()

# ============ Event Handlers ============

def on_preview_clicked(_):
    preview_last_frame()

def on_batch_clicked(_):
    batch_process_and_save()

def on_reset_clicked(_):
    reset_flat_field()

# Bind events
preview_btn.on_click(on_preview_clicked)
batch_btn.on_click(on_batch_clicked)
reset_btn.on_click(on_reset_clicked)

set_process_status("Set parameters and click 'Preview Last Frame' to start")

=== Advanced Frame Processing (MATLAB Style) ===


HBox(children=(IntText(value=100, description='Start Frame:', layout=Layout(width='200px')), IntText(value=200‚Ä¶

HBox(children=(FloatText(value=4.0, description='Gaussian œÉ:', layout=Layout(width='200px')), Dropdown(descrip‚Ä¶

HBox(children=(Text(value='./processed_frames', description='Save Dir:', layout=Layout(width='400px')),))

HBox(children=(Button(button_style='info', description='Preview Last Frame', icon='eye', style=ButtonStyle()),‚Ä¶

HBox(children=(IntProgress(value=0, description='Progress:', layout=Layout(width='50%')),))

Output(layout=Layout(border_bottom='1px solid gray', border_left='1px solid gray', border_right='1px solid gra‚Ä¶

Output()