In [1]:
import openslide
import os
from pathlib import Path
import tifffile


In [2]:
SCN_FILE_PATH = "/media/20TB/augumented-health-system/tnbc/tnbc-max/HnE/batch-01/11_20240731_H-2992_18_HnE_40x.scn"

##### Check the structure of the WSI

In [None]:
def inspect_scn_with_tifffile(file_path):
    try:
        print(f"Inspecting: {file_path}")
        with tifffile.TiffFile(file_path) as tif:
            print(f"Number of pages: {len(tif.pages)}")
            for i, page in enumerate(tif.pages):
                print(f"\nPage {i}:")
                print(f"  Shape: {page.shape}")
                print(f"  Data type: {page.dtype}")
                print(f"  Compression: {page.compression}")
                print(f"  TileWidth: {page.tilewidth}, TileLength: {page.tilelength}")
                print(f"  Photometric: {page.photometric}")
                print(f"  Resolution: X={page.tags.get('XResolution')}, Y={page.tags.get('YResolution')}")
                print(f"  Subfile type: {page.subfiletype}")
            if tif.series:
                print(f"\nNumber of series: {len(tif.series)}")
                for i, series in enumerate(tif.series):
                    print(f"Series {i}:")
                    print(f"  Shape: {series.shape}")
                    print(f"  Levels: {len(series.levels)}")
                    for level, lvl in enumerate(series.levels):
                        print(f"    Level {level}: {lvl.shape}")

    except Exception as e:
        print(f"Error inspecting {file_path}: {e}")

def main():
    if not os.path.exists(SCN_FILE_PATH):
        print(f"File not found: {SCN_FILE_PATH}")
        return
    inspect_scn_with_tifffile(SCN_FILE_PATH)

if __name__ == "__main__":
    main()

Inspecting: /media/20TB/augumented-health-system/tnbc/tnbc-max/HnE/batch-01/11_20240731_H-2992_18_HnE_40x.scn
Number of pages: 17

Page 0:
  Shape: (11758, 5706, 3)
  Data type: uint8
  Compression: 7
  TileWidth: 512, TileLength: 512
  Photometric: 6
  Resolution: X=TiffTag 282 XResolution @19200106004 RATIONAL @19200106016 = (4294967295, 18816, Y=TiffTag 283 YResolution @19200106024 RATIONAL @19200106036 = (4294967295, 18813
  Subfile type: 0

Page 1:
  Shape: (5879, 2853, 3)
  Data type: uint8
  Compression: 7
  TileWidth: 512, TileLength: 512
  Photometric: 6
  Resolution: X=TiffTag 282 XResolution @36135536 RATIONAL @36135548 = (4294967295, 3763293), Y=TiffTag 283 YResolution @36135556 RATIONAL @36135568 = (4294967295, 3762735)
  Subfile type: 0

Page 2:
  Shape: (2939, 1426, 3)
  Data type: uint8
  Compression: 7
  TileWidth: 512, TileLength: 512
  Photometric: 6
  Resolution: X=TiffTag 282 XResolution @36759222 RATIONAL @36759234 = (4294967295, 7529225), Y=TiffTag 283 YResolutio

#### Process a scn file to extract the single highest resolution in tiled format 

In [None]:
import tifffile
import numpy as np
import openslide
from PIL import Image
import os

SCN_FILE_PATH = "/media/20TB/augumented-health-system/tnbc/tnbc-max/HnE/batch-01/11_20240731_H-2992_18_HnE_40x.scn"
OUTPUT_TIFF_PATH = "/media/network/hdd/santosh/tnbc-max_madhura/HnE/11_20240731_H-2992_18_HnE_40x_compatible.tiff"

def extract_full_resolution_image(file_path, output_path):
    try:
        print(f"Processing: {file_path}")
        with tifffile.TiffFile(file_path) as tif:
            # Select Series 1 (highest resolution series)
            series = tif.series[1]  # Series 1 has the full resolution
            full_res_level = series.levels[0]  # Level 0 is the highest resolution

            print(f"Extracting full resolution image with shape: {full_res_level.shape}")

            # Check memory requirements
            img_size_bytes = np.prod(full_res_level.shape) * full_res_level.dtype.itemsize
            print(f"Estimated memory size: {img_size_bytes / (1024**3):.2f} GB")

            # Load the full-resolution image
            full_image = full_res_level.asarray()

            print(f"Saving to {output_path}")
            tifffile.imwrite(
                output_path,
                full_image,
                bigtiff=True,  
                compression='jpeg',  # Optional
                tile=(512, 512),  
                photometric='rgb', 
            )

            print("Extraction complete!")

            # Verify with OpenSlide
            slide = openslide.OpenSlide(output_path)
            print(f"OpenSlide dimensions: {slide.dimensions}")
            print(f"OpenSlide level count: {slide.level_count}")
            print(f"OpenSlide level dimensions: {slide.level_dimensions}")

    except Exception as e:
        print(f"Error processing {file_path}: {e}")

def main():
    if not os.path.exists(SCN_FILE_PATH):
        print(f"File not found: {SCN_FILE_PATH}")
        return

    extract_full_resolution_image(SCN_FILE_PATH, OUTPUT_TIFF_PATH)

if __name__ == "__main__":
    main()

Processing: /media/20TB/augumented-health-system/tnbc/tnbc-max/HnE/batch-01/11_20240731_H-2992_18_HnE_40x.scn
Extracting full resolution image with shape: (193883, 173593, 3)
Estimated memory size: 94.04 GB
Saving to /media/network/hdd/santosh/tnbc-max_madhura/HnE/11_20240731_H-2992_18_HnE_40x_compatible-reso-256_reso-25.tiff
Extraction complete!
OpenSlide dimensions: (173593, 193883)
OpenSlide level count: 1
OpenSlide level dimensions: ((173593, 193883),)


In [5]:
import tifffile
import openslide
import os

# Define the path to the output TIFF file
OUTPUT_TIFF_PATH = "/media/network/hdd/santosh/tnbc-max_madhura/HnE/11_20240731_H-2992_18_HnE_40x_compatible.tiff"

def inspect_tiff_structure(file_path):
    try:
        print(f"Inspecting TIFF file: {file_path}")
        
        # Analyze with tifffile
        with tifffile.TiffFile(file_path) as tif:
            # File size
            file_size_mb = os.path.getsize(file_path) / (1024 ** 2)
            print(f"File size: {file_size_mb:.2f} MB")

            # Number of pages
            print(f"Number of pages: {len(tif.pages)}")

            # Inspect each page
            for i, page in enumerate(tif.pages):
                print(f"\nPage {i}:")
                print(f"  Shape: {page.shape}")
                print(f"  Data type: {page.dtype}")
                print(f"  Compression: {page.compression}")
                print(f"  TileWidth: {page.tilewidth}, TileLength: {page.tilelength}")
                print(f"  Photometric: {page.photometric}")
                print(f"  Resolution: X={page.tags.get('XResolution')}, Y={page.tags.get('YResolution')}")
                print(f"  Subfile type: {page.subfiletype}")

            # Check series (if applicable)
            if tif.series:
                print(f"\nNumber of series: {len(tif.series)}")
                for i, series in enumerate(tif.series):
                    print(f"Series {i}:")
                    print(f"  Shape: {series.shape}")
                    print(f"  Levels: {len(series.levels)}")
                    for level, lvl in enumerate(series.levels):
                        print(f"    Level {level}: {lvl.shape}")

        # Verify with OpenSlide
        print("\nVerifying with OpenSlide:")
        slide = openslide.OpenSlide(file_path)
        print(f"  OpenSlide dimensions: {slide.dimensions}")
        print(f"  OpenSlide level count: {slide.level_count}")
        print(f"  OpenSlide level dimensions: {slide.level_dimensions}")
        print(f"  OpenSlide properties: {dict(slide.properties)}")

    except Exception as e:
        print(f"Error inspecting {file_path}: {e}")

def main():
    if not os.path.exists(OUTPUT_TIFF_PATH):
        print(f"File not found: {OUTPUT_TIFF_PATH}")
        return
    
    inspect_tiff_structure(OUTPUT_TIFF_PATH)

if __name__ == "__main__":
    main()

Inspecting TIFF file: /media/network/hdd/santosh/tnbc-max_madhura/HnE/11_20240731_H-2992_18_HnE_40x_compatible.tiff
File size: 10764.88 MB
Number of pages: 1

Page 0:
  Shape: (193883, 173593, 3)
  Data type: uint8
  Compression: 7
  TileWidth: 512, TileLength: 512
  Photometric: 6
  Resolution: X=TiffTag 282 XResolution @164 RATIONAL @176 = (1, 1), Y=TiffTag 283 YResolution @184 RATIONAL @196 = (1, 1)
  Subfile type: 0

Number of series: 1
Series 0:
  Shape: (193883, 173593, 3)
  Levels: 1
    Level 0: (193883, 173593, 3)

Verifying with OpenSlide:
  OpenSlide dimensions: (173593, 193883)
  OpenSlide level count: 1
  OpenSlide level dimensions: ((173593, 193883),)
  OpenSlide properties: {'openslide.comment': '{"shape": [193883, 173593, 3]}', 'openslide.level-count': '1', 'openslide.level[0].downsample': '1', 'openslide.level[0].height': '193883', 'openslide.level[0].tile-height': '512', 'openslide.level[0].tile-width': '512', 'openslide.level[0].width': '173593', 'openslide.vendor': 