In [2]:
import os
import openslide
import pandas as pd

In [1]:
def get_slide_stats(slide_path):
    try:
        slide = openslide.OpenSlide(slide_path)
        mpp_x = slide.properties.get('openslide.mpp-x')
        mpp_y = slide.properties.get('openslide.mpp-y')
        base_magnification = slide.properties.get('openslide.objective-power')

        if base_magnification is not None:
            base_magnification = float(base_magnification)

        downsample_factors = slide.level_downsamples
        magnifications = [base_magnification / ds if base_magnification else None for ds in downsample_factors]

        slide_stats = {
            'File': os.path.basename(slide_path),
            'MPP X': mpp_x,
            'MPP Y': mpp_y,
            'Base Magnification': base_magnification,
            'Width': slide.dimensions[0],
            'Height': slide.dimensions[1],
            'Levels': slide.level_count,
            'Downsamples': downsample_factors,
            'Tile Width': slide.level_dimensions[0][0],
            'Tile Height': slide.level_dimensions[0][1],
            'Magnifications': magnifications,
        }
        slide.close()
        print(slide_stats)
        return slide_stats
    except openslide.OpenSlideError as e:
        return {'File': os.path.basename(slide_path), 'Error': str(e)}
    except ValueError as e:
        return {'File': os.path.basename(slide_path), 'Error': 'MPP metadata is missing'}


def get_all_slides_stats(base_dir):
    """
    Collect statistics for all `.svs` files in the Biospecimen folder of each case.
    """
    stats = []
    for case_dir in os.listdir(base_dir):
        case_path = os.path.join(base_dir, case_dir)
        if not os.path.isdir(case_path):
            continue

        biospecimen_dir = os.path.join(case_path, 'Biospecimen')
        if not os.path.exists(biospecimen_dir):
            print(f"No Biospecimen folder for case: {case_dir}")
            continue

        for root, _, files in os.walk(biospecimen_dir):
            for file in files:
                if file.endswith('.svs'):
                    slide_path = os.path.join(root, file)
                    stats.append(get_slide_stats(slide_path))
    return stats


def save_stats_to_csv(slide_stats, base_dir):
    """
    Save slide statistics to a CSV file in the base directory.
    """
    stats_file = os.path.join(base_dir, 'slide_statistics.csv')
    df = pd.DataFrame(slide_stats)
    df.to_csv(stats_file, index=False)
    print(f"Slide statistics saved to {stats_file}")


In [4]:
base_dir = 'cases_TEST'

slide_stats = get_all_slides_stats(base_dir)
save_stats_to_csv(slide_stats, base_dir)

{'File': 'TCGA-AA-3848-01A-01-BS1.628aefd8-6784-416f-b410-143efeeeaeff.svs', 'MPP X': '0.50149999999999995', 'MPP Y': '0.50149999999999995', 'Base Magnification': 20.0, 'Width': 32001, 'Height': 30619, 'Levels': 3, 'Downsamples': (1.0, 4.0002584759602815, 16.003125065342395), 'Tile Width': 32001, 'Tile Height': 30619, 'Magnifications': [20.0, 4.9996769259263685, 1.2497559019465234]}
{'File': 'TCGA-AA-3848-01Z-00-DX1.bb018b1c-7748-4865-b00f-42edc35b5047.svs', 'MPP X': '0.23250000000000001', 'MPP Y': '0.23250000000000001', 'Base Magnification': 20.0, 'Width': 56576, 'Height': 51968, 'Levels': 4, 'Downsamples': (1.0, 4.0, 16.0, 32.0), 'Tile Width': 56576, 'Tile Height': 51968, 'Magnifications': [20.0, 5.0, 1.25, 0.625]}
{'File': 'TCGA-AA-3848-01A-01-TS1.196d6712-9850-4eaa-b9e1-3242cf64029e.svs', 'MPP X': '0.50149999999999995', 'MPP Y': '0.50149999999999995', 'Base Magnification': 20.0, 'Width': 18000, 'Height': 18274, 'Levels': 3, 'Downsamples': (1.0, 4.0002189141856395, 8.000437828371279