## VIPS Benchmarking for 3DHistech Tiff Files

Command structure: 

```
vips tiffsave input_file output_file.tif \
    --compression COMPRESSION \
    --Q QUALITY \
    --pyramid \
    --tile \
    --tile-width TILE_WIDTH \
    --tile-height TILE_HEIGHT
```

Selection of important options:
```bash
Operation
--compression=VipsForeignTiffCompression     Compression for this file (Options are: none, jpeg, deflate, packbits, ccittfax4, lzw, webp, zstd, jp2k)
-Q, --Q=gint                                 Q factor (Value between 0 and 100)
--tile-width=gint                            Tile width in pixels
--tile-height=gint                           Tile height in pixels
--pyramid                                    Write a pyramidal tiff
--properties                                 Write a properties document to IMAGEDESCRIPTION
-d, --depth=VipsForeignDzDepth               Pyramid depth

VIPS Application Options
--vips-concurrency=N                         evaluate with N concurrent threads
--vips-progress                              show progress feedback
--vips-cache-max=N                           cache at most N operations
--vips-cache-max-memory=N                    cache at most N bytes in memory
--vips-cache-max-files=N                     allow at most N open files
```

## Test VIPS for 3D Histech Setup

In [7]:
# imports
from pathlib import Path
import time

from openslide import OpenSlide
import pandas as pd

from utils.utils import get_file_size_in_mb, slide_info, remove_file

# Define Filepath
# input_file = "./input_files/3DHistech/Testscans-Fabian/E445_24-1A.1_Big_Jpeg_enhanced/E445_24-1A.1_Wholeslide_EnhancedColors_Extended.tif"
input_file = "./input_files/3DHistech/Testscans-Fabian/E445_24-1A.1_Big_Loss8_raw/E445_24-1A.1_Wholeslide_Default_Extended.tif"
output_folder = "./output_files/VIPS"

# Define Output Path
input_file_path_obj = Path(input_file)
output_folder = Path(output_folder)

output_file_dir = output_folder / input_file_path_obj.parent.name 
output_file_dir.mkdir(exist_ok=True, parents=True)

In [51]:
# options to test:
# tile size test: 256, 512, 1024
# compression: none, jpeg, jp2k
# quality: 100, 95, 90, 80
tile_size = 256
quality = 98
compression = "jpeg"

output_file = (output_file_dir / f"{input_file_path_obj.stem}_vips_{tile_size}_{quality}_{compression}.tif").resolve()

print(output_file)

/home/jovyan/work/output_files/VIPS/E445_24-1A.1_Big_Loss8_raw/E445_24-1A.1_Wholeslide_Default_Extended_vips_256_98_jpeg.tif


In [52]:
%%time
start_time = time.time()

!vips tiffsave $input_file $output_file \
    --pyramid \
    --tile \
    --tile-width $tile_size \
    --tile-height $tile_size \
    --compression $compression \
    -Q $quality \
    --vips-progress \
    --vips-concurrency=8 \
    --vips-cache-max-memory=8000000000

end_time = time.time()
elapsed_time = end_time - start_time
file_size = get_file_size_in_mb(output_file)

try:
    slide = OpenSlide(output_file)
    # Successfully opened the file with OpenSlide
    print("File opened successfully with OpenSlide.")
    # Additional code to work with the 'slide' object if needed
except Exception as e:
    # Other unexpected errors
    print(f"An unexpected error occurred: {str(e)}")



vips temp-1: 67072 x 149760 pixels, 8 threads, 67072 x 1 tiles, 256 lines in buffer
vips temp-1: done in 291s          
File opened successfully with OpenSlide.
CPU times: user 5.26 s, sys: 1.83 s, total: 7.08 s
Wall time: 4min 52s


In [53]:
# todo: analyze runtime, analyze file size, analyze simple pathopatch
minutes, seconds = divmod(elapsed_time, 60)
# todo: analyze runtime, analyze file size, analyze simple pathopatch
minutes, seconds = divmod(elapsed_time, 60)
print(f"Runtime for creating file:    {int(minutes):02d}:{seconds:.2f}")
print(f"Resulting file size:          {get_file_size_in_mb(output_file):.2f} MB")


Runtime for creating file:    04:52.21
Resulting file size:          2867.02 MB


In [21]:
#slide = OpenSlide(output_file)
slide = OpenSlide("/home/jovyan/work/output_files/VIPS/E445_24-1A.1_Big_Loss8_raw/E445_24-1A.1_Wholeslide_Default_Extended_vips_256_80_jp2k.tif")
slide_info(slide)

OpenSlideError: Unsupported TIFF compression: 33004

## Test PathoPatcher Integration

In [1]:
!wsi_extraction --help

usage: wsi_extraction [-h] [--wsi_paths WSI_PATHS]
                      [--wsi_filelist WSI_FILELIST]
                      [--output_path OUTPUT_PATH]
                      [--wsi_extension {svs,tiff,tif,bif,scn,ndpi,vms,vmu}]
                      [--config CONFIG] [--patch_size PATCH_SIZE]
                      [--patch_overlap PATCH_OVERLAP]
                      [--target_mpp TARGET_MPP] [--target_mag TARGET_MAG]
                      [--downsample DOWNSAMPLE] [--level LEVEL]
                      [--context_scales [CONTEXT_SCALES ...]]
                      [--check_resolution CHECK_RESOLUTION]
                      [--processes PROCESSES] [--overwrite]
                      [--annotation_paths ANNOTATION_PATHS]
                      [--annotation_extension {json}]
                      [--incomplete_annotations]
                      [--label_map_file LABEL_MAP_FILE]
                      [--save_only_annotated_patches]
                      [--save_context_without_mask]
      

```bash
!wsi_extraction \
    --wsi_paths "/home/jovyan/work/output_files/VIPS/E445_24-1A.1_Big_Loss8_raw/E445_24-1A.1_Wholeslide_Default_Extended_vips_256_95_jpeg.tif" \
    --output_path "/home/jovyan/work/output_files/VIPS/E445_24-1A.1_Big_Loss8_raw/PathoPatch_E445_24-1A.1_Wholeslide_Default_Extended_vips_256_95_jpeg" \
    --wsi_extension "tif" \
    --patch_size 256 \
    --target_mag 10 \
    --overwrite \
    --processes 8 \
    --apply_prefilter \
    --wsi_magnification 40
```

In [5]:
# very simple pathopatcher command to check if patches can be extracted
#pathopatch_outdir = Path(output_file).parent / f"PathoPatch_{Path(output_file.stem)}"
#pathopatch_outdir = pathopatch_outdir.resolve()

!wsi_extraction \
    --wsi_paths "/home/jovyan/work/output_files/VIPS/E445_24-1A.1_Big_Loss8_raw/E445_24-1A.1_Wholeslide_Default_Extended_vips_256_95_jpeg.tif" \
    --output_path "/home/jovyan/work/output_files/VIPS/E445_24-1A.1_Big_Loss8_raw/PathoPatch_E445_24-1A.1_Wholeslide_Default_Extended_vips_256_95_jpeg" \
    --wsi_extension "tif" \
    --patch_size 1024 \
    --patch_overlap 6.25 \
    --min_intersection_ratio 0.05 \
    --target_mag 40 \
    --overwrite \
    --processes 8 \
    --apply_prefilter \
    --wsi_magnification 40

[37m2024-01-19 10:11:30,521 [INFO] - Using OpenSlide
[0m[37m2024-01-19 10:11:30,523 [INFO] - Data store directory: /home/jovyan/work/output_files/VIPS/E445_24-1A.1_Big_Loss8_raw/PathoPatch_E445_24-1A.1_Wholeslide_Default_Extended_vips_256_95_jpeg
[0m[37m2024-01-19 10:11:30,524 [INFO] - Images found: 1
[0m[37m2024-01-19 10:11:30,525 [INFO] - Annotations found: 0
[0m[37m2024-01-19 10:11:30,526 [INFO] - Removing complete dataset! This may take a while.
[0m[37m2024-01-19 10:11:33,021 [INFO] - ***********************************************
[0m[37m2024-01-19 10:11:33,022 [INFO] - 1/1: E445_24-1A.1_Wholeslide_Default_Extended_vips_256_95_jpeg.tif
[0m[37m2024-01-19 10:11:33,023 [INFO] - Computing patches for E445_24-1A.1_Wholeslide_Default_Extended_vips_256_95_jpeg.tif
[0m[37m2024-01-19 10:11:33,202 [INFO] - Generate thumbnails
[0m[37m2024-01-19 10:11:43,251 [INFO] - E445_24-1A.1_Wholeslide_Default_Extended_vips_256_95_jpeg.tif: Processing 502 patches.
[0m[37m2024-01-19 1

In [47]:
!echo $pathopatch_outdir

/home/jovyan/work/output_files/VIPS/E445_24-1A.1_Big_Loss8_raw/PathoPatch_E445_24-1A.1_Wholeslide_Default_Extended_vips_256_95_jpeg


### Remove files

**1. Remove tiff**

In [7]:
user_input = input(f"Do you want to remove the file: {output_file}? (y/n): ").lower()
while user_input not in ['y', 'n']:
    user_input = input("Invalid input. Please enter 'y' or 'n': ").lower()

if user_input == 'y':
    remove_file(output_file)
else:
    print(f"Skipping removal of {output_file}")

Do you want to remove the file: /home/jovyan/work/output_files/VIPS/E445_24-1A.1_Big_Jpeg_enhanced/E445_24-1A.1_Wholeslide_EnhancedColors_Extended_vips.tif? (y/n):  y


File removed: /home/jovyan/work/output_files/VIPS/E445_24-1A.1_Big_Jpeg_enhanced/E445_24-1A.1_Wholeslide_EnhancedColors_Extended_vips.tif


**2. Remove PathoPatcher Output**

## Systematic Evaluation

In [8]:

# Function to save VIPS file and measure performance
def save_and_measure_performance(input_file, output_file, tile_size, quality, compression):
    # Start measuring time
    start_time = time.time()

    # VIPS command
    vips_command = f"vips tiffsave {input_file} {output_file} \
        --pyramid \
        --tile \
        --tile-width {tile_size} \
        --tile-height {tile_size} \
        --compression {compression} \
        -Q {quality} \
        --vips-progress \
        --vips-concurrency=4 \
        --vips-cache-max-memory=8000000000"
    
    !{vips_command}

    # Stop measuring time
    end_time = time.time()
    elapsed_time = end_time - start_time

    # Get file size
    file_size = get_file_size_in_mb(output_file)

    # Try to open with OpenSlide
    try:
        slide = OpenSlide(output_file)
        openslide_error = None
    except Exception as e:
        openslide_error = str(e)
        
    #remove_file(output_file)
    
    return {
        'Tile Size': tile_size,
        'Quality': quality,
        'Compression': compression,
        'Elapsed Time (s)': elapsed_time,
        'File Size (MB)': file_size,
        'OpenSlide Error': openslide_error
    }


In [22]:
input_file = "./input_files/3DHistech/Testscans-Fabian/E445_24-1A.1_Big_Loss8_raw/E445_24-1A.1_Wholeslide_Default_Extended.tif"
output_folder = "./output_files/VIPS"

# Define Output Path
input_file_path_obj = Path(input_file)
output_folder = Path(output_folder)

output_file_dir = output_folder / input_file_path_obj.parent.name 
output_file_dir.mkdir(exist_ok=True, parents=True)

tile_sizes = [256]
qualities = [100]#, 90, 95, 98, 100]
compressions = ["none"]

# Results table
results = []

# Loop through combinations
for tile_size in tile_sizes:
    for quality in qualities:
        for compression in compressions:
            output_file = (output_file_dir / f"{input_file_path_obj.stem}_vips_{tile_size}_{quality}_{compression}.tif").resolve()
            result = save_and_measure_performance(input_file, output_file, tile_size, quality, compression)
            results.append(result)

# Create DataFrame
results_df = pd.DataFrame(results)

# Display the results table
print(results_df)

vips ./input_files/3DHistech/Testscans-Fabian/E445_24-1A.1_Big_Loss8_raw/E445_24-1A.1_Wholeslide_Default_Extended.tif: 67072 x 149760 pixels, 4 threads, 67072 x 1 tiles, 128 lines in buffer
vips ./input_files/3DHistech/Testscans-Fabian/E445_24-1A.1_Big_Loss8_raw/E445_24-1A.1_Wholeslide_Default_Extended.tif: done in 527s          
   Tile Size  Quality Compression  Elapsed Time (s)  File Size (MB)  \
0        256      100        none        528.576887    38378.161085   

  OpenSlide Error  
0            None  
