In [2]:
import os
import openslide
import pandas as pd

In [3]:
def get_slide_stats(slide_path):
    try:
        slide = openslide.OpenSlide(slide_path)
        mpp_x = slide.properties.get('openslide.mpp-x')
        mpp_y = slide.properties.get('openslide.mpp-y')
        base_magnification = slide.properties.get('openslide.objective-power')

        if base_magnification is not None:
            base_magnification = float(base_magnification)

        downsample_factors = slide.level_downsamples
        magnifications = [base_magnification / ds if base_magnification else None for ds in downsample_factors]

        slide_stats = {
            'File': os.path.basename(slide_path),
            'MPP X': mpp_x,
            'MPP Y': mpp_y,
            'Base Magnification': base_magnification,
            'Width': slide.dimensions[0],
            'Height': slide.dimensions[1],
            'Levels': slide.level_count,
            'Downsamples': downsample_factors,
            'Tile Width': slide.level_dimensions[0][0],
            'Tile Height': slide.level_dimensions[0][1],
            'Magnifications': magnifications,
        }
        slide.close()
        print(slide_stats)
        return slide_stats
    except openslide.OpenSlideError as e:
        return {'File': os.path.basename(slide_path), 'Error': str(e)}
    except ValueError as e:
        return {'File': os.path.basename(slide_path), 'Error': 'MPP metadata is missing'}


def get_all_slides_stats(base_dir):
    """
    Collect statistics for all `.svs` files in the Biospecimen folder of each case.
    """
    stats = []
    for case_dir in os.listdir(base_dir):
        case_path = os.path.join(base_dir, case_dir)
        if not os.path.isdir(case_path):
            continue

        biospecimen_dir = os.path.join(case_path, 'Biospecimen')
        if not os.path.exists(biospecimen_dir):
            print(f"No Biospecimen folder for case: {case_dir}")
            continue

        for root, _, files in os.walk(biospecimen_dir):
            for file in files:
                if file.endswith('.svs'):
                    slide_path = os.path.join(root, file)
                    stats.append(get_slide_stats(slide_path))
    return stats


def save_stats_to_csv(slide_stats, base_dir):
    """
    Save slide statistics to a CSV file in the base directory.
    """
    stats_file = os.path.join(base_dir, 'slide_statistics.csv')
    df = pd.DataFrame(slide_stats)
    df.to_csv(stats_file, index=False)
    print(f"Slide statistics saved to {stats_file}")


In [4]:
base_dir = 'cases_TEST'

slide_stats = get_all_slides_stats(base_dir)
save_stats_to_csv(slide_stats, base_dir)

{'File': 'TCGA-AD-6895-01Z-00-DX1.7FB1FBC6-683B-4285-89D1-A7A20F07A9D4.svs', 'MPP X': '0.252', 'MPP Y': '0.252', 'Base Magnification': 40.0, 'Width': 22062, 'Height': 14792, 'Levels': 3, 'Downsamples': (1.0, 4.000181323662738, 8.001088139281828), 'Tile Width': 22062, 'Tile Height': 14792, 'Magnifications': [40.0, 9.999546711391142, 4.999320005439957]}
{'File': 'TCGA-AD-6895-01A-01-TS1.60e5a7e9-6a8d-47ae-8094-7429f03b42b3.svs', 'MPP X': '0.2462', 'MPP Y': '0.2462', 'Base Magnification': 40.0, 'Width': 26656, 'Height': 24435, 'Levels': 3, 'Downsamples': (1.0, 4.00024557956778, 16.00098231827112), 'Tile Width': 26656, 'Tile Height': 24435, 'Magnifications': [40.0, 9.999386088771564, 2.499846522192891]}
{'File': 'TCGA-AA-3552-01Z-00-DX1.84133d42-9a39-44b5-a1ec-a5382650c939.svs', 'MPP X': '0.23250000000000001', 'MPP Y': '0.23250000000000001', 'Base Magnification': 20.0, 'Width': 57600, 'Height': 47872, 'Levels': 3, 'Downsamples': (1.0, 4.0, 16.0), 'Tile Width': 57600, 'Tile Height': 47872, 