## VIPS Benchmarking for 3DHistech Tiff Files

Command structure: 

```
vips tiffsave input_file output_file.tif \
    --compression COMPRESSION \
    --Q QUALITY \
    --pyramid \
    --tile \
    --tile-width TILE_WIDTH \
    --tile-height TILE_HEIGHT
```

Selection of important options:
```bash
Operation
--compression=VipsForeignTiffCompression     Compression for this file (Options are: none, jpeg, deflate, packbits, ccittfax4, lzw, webp, zstd, jp2k)
-Q, --Q=gint                                 Q factor (Value between 0 and 100)
--tile-width=gint                            Tile width in pixels
--tile-height=gint                           Tile height in pixels
--pyramid                                    Write a pyramidal tiff
--properties                                 Write a properties document to IMAGEDESCRIPTION
-d, --depth=VipsForeignDzDepth               Pyramid depth

VIPS Application Options
--vips-concurrency=N                         evaluate with N concurrent threads
--vips-progress                              show progress feedback
--vips-cache-max=N                           cache at most N operations
--vips-cache-max-memory=N                    cache at most N bytes in memory
--vips-cache-max-files=N                     allow at most N open files
```

## Test VIPS for 3D Histech Setup

In [None]:
# imports
from pathlib import Path
import time

from openslide import OpenSlide
import pandas as pd

from utils.utils import get_file_size_in_mb, slide_info, remove_file

# Define Filepath
# input_file = "./input_files/3DHistech/Testscans-Fabian/E445_24-1A.1_Big_Jpeg_enhanced/E445_24-1A.1_Wholeslide_EnhancedColors_Extended.tif"
input_file = "./input_files/3DHistech/Testscans-Fabian/E445_24-1A.1_Big_Loss8_raw/E445_24-1A.1_Wholeslide_Default_Extended.tif"
output_folder = "./output_files/VIPS"

# Define Output Path
input_file_path_obj = Path(input_file)
output_folder = Path(output_folder)

output_file_dir = output_folder / input_file_path_obj.parent.name 
output_file_dir.mkdir(exist_ok=True, parents=True)

In [None]:
# options to test:
# tile size test: 256, 512, 1024
# compression: none, jpeg, jp2k
# quality: 100, 95, 90, 80
tile_size = 256
quality = 98
compression = "jpeg"

output_file = (output_file_dir / f"{input_file_path_obj.stem}_vips_{tile_size}_{quality}_{compression}.tif").resolve()

print(output_file)

In [None]:
%%time
start_time = time.time()

!vips tiffsave $input_file $output_file \
    --pyramid \
    --tile \
    --tile-width $tile_size \
    --tile-height $tile_size \
    --compression $compression \
    -Q $quality \
    --vips-progress \
    --vips-concurrency=8 \
    --vips-cache-max-memory=8000000000

end_time = time.time()
elapsed_time = end_time - start_time
file_size = get_file_size_in_mb(output_file)

try:
    slide = OpenSlide(output_file)
    # Successfully opened the file with OpenSlide
    print("File opened successfully with OpenSlide.")
    # Additional code to work with the 'slide' object if needed
except Exception as e:
    # Other unexpected errors
    print(f"An unexpected error occurred: {str(e)}")



In [None]:
# todo: analyze runtime, analyze file size, analyze simple pathopatch
minutes, seconds = divmod(elapsed_time, 60)
# todo: analyze runtime, analyze file size, analyze simple pathopatch
minutes, seconds = divmod(elapsed_time, 60)
print(f"Runtime for creating file:    {int(minutes):02d}:{seconds:.2f}")
print(f"Resulting file size:          {get_file_size_in_mb(output_file):.2f} MB")


In [None]:
#slide = OpenSlide(output_file)
slide = OpenSlide("/home/jovyan/work/output_files/VIPS/E445_24-1A.1_Big_Loss8_raw/E445_24-1A.1_Wholeslide_Default_Extended_vips_256_80_jp2k.tif")
slide_info(slide)

## Test PathoPatcher Integration

In [None]:
!wsi_extraction --help

```bash
!wsi_extraction \
    --wsi_paths "/home/jovyan/work/output_files/VIPS/E445_24-1A.1_Big_Loss8_raw/E445_24-1A.1_Wholeslide_Default_Extended_vips_256_95_jpeg.tif" \
    --output_path "/home/jovyan/work/output_files/VIPS/E445_24-1A.1_Big_Loss8_raw/PathoPatch_E445_24-1A.1_Wholeslide_Default_Extended_vips_256_95_jpeg" \
    --wsi_extension "tif" \
    --patch_size 256 \
    --target_mag 10 \
    --overwrite \
    --processes 8 \
    --apply_prefilter \
    --wsi_magnification 40
```

In [None]:
# very simple pathopatcher command to check if patches can be extracted
#pathopatch_outdir = Path(output_file).parent / f"PathoPatch_{Path(output_file.stem)}"
#pathopatch_outdir = pathopatch_outdir.resolve()

!wsi_extraction \
    --wsi_paths "/home/jovyan/work/output_files/VIPS/E445_24-1A.1_Big_Loss8_raw/E445_24-1A.1_Wholeslide_Default_Extended_vips_256_95_jpeg.tif" \
    --output_path "/home/jovyan/work/output_files/VIPS/E445_24-1A.1_Big_Loss8_raw/PathoPatch_E445_24-1A.1_Wholeslide_Default_Extended_vips_256_95_jpeg" \
    --wsi_extension "tif" \
    --patch_size 1024 \
    --patch_overlap 6.25 \
    --min_intersection_ratio 0.05 \
    --target_mag 40 \
    --overwrite \
    --processes 8 \
    --apply_prefilter \
    --wsi_magnification 40

In [None]:
!echo $pathopatch_outdir

### Remove files

**1. Remove tiff**

In [None]:
user_input = input(f"Do you want to remove the file: {output_file}? (y/n): ").lower()
while user_input not in ['y', 'n']:
    user_input = input("Invalid input. Please enter 'y' or 'n': ").lower()

if user_input == 'y':
    remove_file(output_file)
else:
    print(f"Skipping removal of {output_file}")

**2. Remove PathoPatcher Output**

## Systematic Evaluation

In [None]:

# Function to save VIPS file and measure performance
def save_and_measure_performance(input_file, output_file, tile_size, quality, compression):
    # Start measuring time
    start_time = time.time()

    # VIPS command
    vips_command = f"vips tiffsave {input_file} {output_file} \
        --pyramid \
        --tile \
        --tile-width {tile_size} \
        --tile-height {tile_size} \
        --compression {compression} \
        -Q {quality} \
        --vips-progress \
        --vips-concurrency=4 \
        --vips-cache-max-memory=8000000000"
    
    !{vips_command}

    # Stop measuring time
    end_time = time.time()
    elapsed_time = end_time - start_time

    # Get file size
    file_size = get_file_size_in_mb(output_file)

    # Try to open with OpenSlide
    try:
        slide = OpenSlide(output_file)
        openslide_error = None
    except Exception as e:
        openslide_error = str(e)
        
    #remove_file(output_file)
    
    return {
        'Tile Size': tile_size,
        'Quality': quality,
        'Compression': compression,
        'Elapsed Time (s)': elapsed_time,
        'File Size (MB)': file_size,
        'OpenSlide Error': openslide_error
    }


In [None]:
input_file = "./input_files/3DHistech/Testscans-Fabian/E445_24-1A.1_Big_Loss8_raw/E445_24-1A.1_Wholeslide_Default_Extended.tif"
output_folder = "./output_files/VIPS"

# Define Output Path
input_file_path_obj = Path(input_file)
output_folder = Path(output_folder)

output_file_dir = output_folder / input_file_path_obj.parent.name 
output_file_dir.mkdir(exist_ok=True, parents=True)

tile_sizes = [256]
qualities = [100]#, 90, 95, 98, 100]
compressions = ["none"]

# Results table
results = []

# Loop through combinations
for tile_size in tile_sizes:
    for quality in qualities:
        for compression in compressions:
            output_file = (output_file_dir / f"{input_file_path_obj.stem}_vips_{tile_size}_{quality}_{compression}.tif").resolve()
            result = save_and_measure_performance(input_file, output_file, tile_size, quality, compression)
            results.append(result)

# Create DataFrame
results_df = pd.DataFrame(results)

# Display the results table
print(results_df)