# KLB to TIFF Converter

Convert KLB files to uncompressed TIFF format and copy metadata files.
Measures conversion/transfer times and file sizes.

**Author:** Miller Brain Observatory  
**Date:** 2025-10-31

This notebook replicates the functionality of the MATLAB `io_test.m` script.

## Setup and Imports

In [1]:
import time
from pathlib import Path
from typing import Dict, List, Tuple

import numpy as np
import pandas as pd
import pyklb
import tifffile

print(f"pyklb version: {pyklb.__version__}")

pyklb version: 0.3.0


## Configuration

Set your source and destination directories here.

In [6]:
# modify these paths as needed
source_root = Path(r"\\rbo-s1\S1_DATA\isoview\foconnell\io_corrected_test\klb")
dest_root = Path(r"\\rbo-s1\S1_DATA\isoview\foconnell\io_corrected_test\tiff")

# optional: specify which timepoints to process (None = all)
# timepoints_to_process = None  # process all TM* directories
timepoints_to_process = ['TM000000', 'TM000001']  # or specify: ['TM000000', 'TM000001']

print(f"Source: {source_root}")
print(f"Destination: {dest_root}")
print(f"Processing: {timepoints_to_process if timepoints_to_process else 'all timepoints'}")

Source: \\rbo-s1\S1_DATA\isoview\foconnell\io_corrected_test\klb
Destination: \\rbo-s1\S1_DATA\isoview\foconnell\io_corrected_test\tiff
Processing: ['TM000000', 'TM000001']


## Helper Functions

In [7]:
def convert_klb_to_tiff(klb_path: Path, tiff_path: Path) -> Tuple[float, float, float]:
    """Convert a single KLB file to uncompressed TIFF.

    Parameters
    ----------
    klb_path : Path
        Path to source KLB file.
    tiff_path : Path
        Path to destination TIFF file.

    Returns
    -------
    klb_size_mb : float
        Size of KLB file in megabytes.
    tiff_size_mb : float
        Size of TIFF file in megabytes.
    elapsed_time : float
        Conversion time in seconds.
    """
    start_time = time.time()

    # read klb file
    volume = pyklb.readfull(str(klb_path))

    # write uncompressed tiff
    tifffile.imwrite(
        tiff_path,
        volume,
        compression=None,
        photometric='minisblack',
        metadata={'software': 'pyklb-converter'}
    )

    elapsed_time = time.time() - start_time

    # get file sizes
    klb_size_mb = klb_path.stat().st_size / (1024 ** 2)
    tiff_size_mb = tiff_path.stat().st_size / (1024 ** 2)

    return klb_size_mb, tiff_size_mb, elapsed_time


def copy_metadata_files(source_dir: Path, dest_dir: Path,
                        extensions: List[str]) -> int:
    """Copy metadata files from source to destination.

    Parameters
    ----------
    source_dir : Path
        Source directory.
    dest_dir : Path
        Destination directory.
    extensions : list of str
        File extensions to copy (e.g., ['.xml', '.mat']).

    Returns
    -------
    int
        Number of files copied.
    """
    files_copied = 0

    for ext in extensions:
        for file_path in source_dir.glob(f'*{ext}'):
            dest_path = dest_dir / file_path.name
            dest_path.write_bytes(file_path.read_bytes())
            files_copied += 1

    return files_copied


def process_timepoint(source_dir: Path, dest_dir: Path,
                     timepoint_name: str) -> Dict:
    """Process a single timepoint directory.

    Parameters
    ----------
    source_dir : Path
        Source timepoint directory.
    dest_dir : Path
        Destination timepoint directory.
    timepoint_name : str
        Name of the timepoint (e.g., 'TM000000').

    Returns
    -------
    dict
        Statistics dictionary.
    """
    print(f'=== Processing {timepoint_name} ===')

    # create destination directory
    dest_dir.mkdir(parents=True, exist_ok=True)

    # find files
    klb_files = list(source_dir.glob('*.klb'))
    xml_files = list(source_dir.glob('*.xml'))
    mat_files = list(source_dir.glob('*.mat'))

    print(f'  Found: {len(klb_files)} KLB, {len(xml_files)} XML, '
          f'{len(mat_files)} MAT files')

    # convert klb files
    conversion_start = time.time()
    klb_total_size = 0.0
    tiff_total_size = 0.0

    for idx, klb_path in enumerate(klb_files, 1):
        tiff_path = dest_dir / klb_path.stem
        tiff_path = tiff_path.with_suffix('.tif')

        print(f'    [{idx}/{len(klb_files)}] Converting {klb_path.name}... ',
              end='', flush=True)

        try:
            klb_size, tiff_size, file_time = convert_klb_to_tiff(
                klb_path, tiff_path
            )
            klb_total_size += klb_size
            tiff_total_size += tiff_size

            throughput = tiff_size / file_time if file_time > 0 else 0
            print(f'{klb_size:.2f} MB -> {tiff_size:.2f} MB '
                  f'({file_time:.2f} s, {throughput:.2f} MB/s)')

        except Exception as e:
            print(f'ERROR: {e}')
            continue

    conversion_time = time.time() - conversion_start

    # copy metadata files
    copy_start = time.time()
    xml_copied = copy_metadata_files(source_dir, dest_dir, ['.xml'])
    mat_copied = copy_metadata_files(source_dir, dest_dir, ['.mat'])
    copy_time = time.time() - copy_start

    print(f'  Copied {xml_copied} XML files')
    print(f'  Copied {mat_copied} MAT files')

    total_time = conversion_time + copy_time
    compression_ratio = (tiff_total_size / klb_total_size
                        if klb_total_size > 0 else 0)

    print(f'  Timepoint total: {total_time:.2f} s '
          f'(conversion: {conversion_time:.2f} s, copy: {copy_time:.2f} s)')
    print(f'  Compression ratio: {compression_ratio:.2f}x '
          f'(KLB: {klb_total_size:.2f} MB, TIFF: {tiff_total_size:.2f} MB)\n')

    return {
        'timepoint': timepoint_name,
        'num_files': len(klb_files),
        'klb_mb': klb_total_size,
        'tiff_mb': tiff_total_size,
        'conversion_time_s': conversion_time,
        'copy_time_s': copy_time,
        'total_time_s': total_time
    }


def print_summary(stats_list: List[Dict], total_time: float):
    """Print summary statistics.

    Parameters
    ----------
    stats_list : list of dict
        List of statistics dictionaries from each timepoint.
    total_time : float
        Total processing time in seconds.
    """
    df = pd.DataFrame(stats_list)

    total_files = df['num_files'].sum()
    total_klb_mb = df['klb_mb'].sum()
    total_tiff_mb = df['tiff_mb'].sum()
    total_conversion = df['conversion_time_s'].sum()

    print('\n' + '=' * 40)
    print('CONVERSION SUMMARY')
    print('=' * 40)
    print(f'Total timepoints processed: {len(stats_list)}')
    print(f'Total KLB files converted: {total_files}')
    print(f'Total time: {total_time:.2f} s ({total_time/60:.2f} min)\n')

    print(f'Total KLB size:  {total_klb_mb:.2f} MB '
          f'({total_klb_mb/1024:.2f} GB)')
    print(f'Total TIFF size: {total_tiff_mb:.2f} MB '
          f'({total_tiff_mb/1024:.2f} GB)')

    overall_ratio = (total_tiff_mb / total_klb_mb
                    if total_klb_mb > 0 else 0)
    print(f'Overall compression ratio: {overall_ratio:.2f}x\n')

    print(f'Average conversion time per timepoint: '
          f'{df["conversion_time_s"].mean():.2f} s')
    print(f'Average copy time per timepoint: '
          f'{df["copy_time_s"].mean():.2f} s')
    print(f'Average total time per timepoint: '
          f'{df["total_time_s"].mean():.2f} s\n')

    avg_throughput = (total_tiff_mb / total_conversion
                     if total_conversion > 0 else 0)
    print(f'Average throughput: {avg_throughput:.2f} MB/s')
    print('=' * 40)

    return df

## Find Timepoint Directories

In [8]:
# validate source directory
if not source_root.exists():
    raise FileNotFoundError(f"Source directory does not exist: {source_root}")

# find timepoint directories
if timepoints_to_process:
    timepoint_dirs = [source_root / tp for tp in timepoints_to_process]
    # validate specified timepoints exist
    for tp_dir in timepoint_dirs:
        if not tp_dir.exists():
            print(f'Warning: Timepoint directory not found: {tp_dir}')
    timepoint_dirs = [d for d in timepoint_dirs if d.exists()]
else:
    timepoint_dirs = sorted(source_root.glob('TM*'))
    timepoint_dirs = [d for d in timepoint_dirs if d.is_dir()]

if not timepoint_dirs:
    raise FileNotFoundError(f"No timepoint directories found in: {source_root}")

print(f'Found {len(timepoint_dirs)} timepoint directories:')
for d in timepoint_dirs:
    print(f'  - {d.name}')

Found 2 timepoint directories:
  - TM000000
  - TM000001


## Process All Timepoints

This cell will convert all KLB files to TIFF and copy metadata files.

In [None]:
# process each timepoint
total_start = time.time()
stats_list = []

for idx, tp_dir in enumerate(timepoint_dirs, 1):
    tp_name = tp_dir.name
    dest_tp_dir = dest_root / tp_name

    print(f'[{idx}/{len(timepoint_dirs)}] ', end='')

    try:
        stats = process_timepoint(tp_dir, dest_tp_dir, tp_name)
        stats_list.append(stats)
    except Exception as e:
        print(f'Error processing {tp_name}: {e}\n')
        continue

total_time = time.time() - total_start

[1/2] === Processing TM000000 ===
  Found: 4 KLB, 2 XML, 5 MAT files
    [1/4] Converting SPM00_TM000000_CM00_CHN01.klb... 72.21 MB -> 232.08 MB (0.45 s, 518.90 MB/s)
    [2/4] Converting SPM00_TM000000_CM01_CHN01.klb... 71.29 MB -> 232.08 MB (0.54 s, 431.82 MB/s)
    [3/4] Converting SPM00_TM000000_CM02_CHN00.klb... 66.68 MB -> 232.08 MB (0.47 s, 488.73 MB/s)
    [4/4] Converting SPM00_TM000000_CM03_CHN00.klb... 68.03 MB -> 232.08 MB (0.42 s, 552.40 MB/s)
  Copied 2 XML files
  Copied 5 MAT files
  Timepoint total: 2.09 s (conversion: 1.95 s, copy: 0.14 s)
  Compression ratio: 3.34x (KLB: 278.21 MB, TIFF: 928.30 MB)

[2/2] === Processing TM000001 ===
  Found: 4 KLB, 2 XML, 5 MAT files
    [1/4] Converting SPM00_TM000001_CM00_CHN01.klb... 73.13 MB -> 232.08 MB (0.90 s, 257.25 MB/s)
    [2/4] Converting SPM00_TM000001_CM01_CHN01.klb... 72.07 MB -> 232.08 MB (0.49 s, 471.81 MB/s)
    [3/4] Converting SPM00_TM000001_CM02_CHN00.klb... 67.08 MB -> 232.08 MB (0.45 s, 510.23 MB/s)
    [4/4] C

## Summary Statistics

In [None]:
if stats_list:
    df = print_summary(stats_list, total_time)

    # display dataframe
    print("\nDetailed statistics:")
    display(df)
else:
    print("No statistics to display.")

## Save Statistics to CSV

In [None]:
if stats_list:
    csv_path = dest_root / 'conversion_statistics.csv'
    csv_path.parent.mkdir(parents=True, exist_ok=True)
    df.to_csv(csv_path, index=False)
    print(f'✓ Statistics saved to: {csv_path}')
    print(f'  File size: {csv_path.stat().st_size} bytes')
else:
    print("No statistics to save.")

## Visualize Results (Optional)

Create some plots to visualize the conversion statistics.

In [None]:
import matplotlib.pyplot as plt

if stats_list:
    fig, axes = plt.subplots(2, 2, figsize=(14, 10))

    # file sizes comparison
    ax = axes[0, 0]
    x = range(len(df))
    width = 0.35
    ax.bar([i - width/2 for i in x], df['klb_mb'], width, label='KLB', alpha=0.8)
    ax.bar([i + width/2 for i in x], df['tiff_mb'], width, label='TIFF', alpha=0.8)
    ax.set_xlabel('Timepoint')
    ax.set_ylabel('Size (MB)')
    ax.set_title('File Sizes: KLB vs TIFF')
    ax.set_xticks(x)
    ax.set_xticklabels(df['timepoint'], rotation=45, ha='right')
    ax.legend()
    ax.grid(axis='y', alpha=0.3)

    # compression ratio
    ax = axes[0, 1]
    compression_ratios = df['tiff_mb'] / df['klb_mb']
    ax.bar(x, compression_ratios, alpha=0.8, color='green')
    ax.axhline(compression_ratios.mean(), color='red', linestyle='--',
               label=f'Mean: {compression_ratios.mean():.2f}x')
    ax.set_xlabel('Timepoint')
    ax.set_ylabel('Compression Ratio (TIFF/KLB)')
    ax.set_title('Compression Efficiency')
    ax.set_xticks(x)
    ax.set_xticklabels(df['timepoint'], rotation=45, ha='right')
    ax.legend()
    ax.grid(axis='y', alpha=0.3)

    # processing times
    ax = axes[1, 0]
    ax.bar(x, df['conversion_time_s'], width, label='Conversion', alpha=0.8)
    ax.bar(x, df['copy_time_s'], width, bottom=df['conversion_time_s'],
           label='Copy', alpha=0.8)
    ax.set_xlabel('Timepoint')
    ax.set_ylabel('Time (seconds)')
    ax.set_title('Processing Time Breakdown')
    ax.set_xticks(x)
    ax.set_xticklabels(df['timepoint'], rotation=45, ha='right')
    ax.legend()
    ax.grid(axis='y', alpha=0.3)

    # throughput
    ax = axes[1, 1]
    throughput = df['tiff_mb'] / df['conversion_time_s']
    ax.bar(x, throughput, alpha=0.8, color='orange')
    ax.axhline(throughput.mean(), color='red', linestyle='--',
               label=f'Mean: {throughput.mean():.2f} MB/s')
    ax.set_xlabel('Timepoint')
    ax.set_ylabel('Throughput (MB/s)')
    ax.set_title('Conversion Throughput')
    ax.set_xticks(x)
    ax.set_xticklabels(df['timepoint'], rotation=45, ha='right')
    ax.legend()
    ax.grid(axis='y', alpha=0.3)

    plt.tight_layout()
    plt.show()

    # save figure
    fig_path = dest_root / 'conversion_statistics.png'
    fig.savefig(fig_path, dpi=150, bbox_inches='tight')
    print(f'\n✓ Figure saved to: {fig_path}')
else:
    print("No data to visualize.")