In [6]:
import os

os.add_dll_directory("C:/Users/davet/Documents/openslide-bin-4.0.0.6-windows-x64/bin")
import openslide
import pandas as pd

In [7]:
def get_slide_stats(slide_path):
    try:
        slide = openslide.OpenSlide(slide_path)
        mpp_x = slide.properties.get('openslide.mpp-x')
        mpp_y = slide.properties.get('openslide.mpp-y')
        base_magnification = slide.properties.get('openslide.objective-power')

        if base_magnification is not None:
            base_magnification = float(base_magnification)

        downsample_factors = slide.level_downsamples
        magnifications = [base_magnification / ds if base_magnification else None for ds in downsample_factors]

        slide_stats = {
            'File': os.path.basename(slide_path),
            'MPP X': mpp_x,
            'MPP Y': mpp_y,
            'Base Magnification': base_magnification,
            'Width': slide.dimensions[0],
            'Height': slide.dimensions[1],
            'Levels': slide.level_count,
            'Downsamples': downsample_factors,
            'Tile Width': slide.level_dimensions[0][0],
            'Tile Height': slide.level_dimensions[0][1],
            'Magnifications': magnifications,
        }
        slide.close()
        print(slide_stats)
        return slide_stats
    except openslide.OpenSlideError as e:
        return {'File': os.path.basename(slide_path), 'Error': str(e)}
    except ValueError as e:
        return {'File': os.path.basename(slide_path), 'Error': 'MPP metadata is missing'}


def get_all_slides_stats(base_dir):
    """
    Collect statistics for all `.svs` files in the Biospecimen folder of each case.
    """
    stats = []
    for case_dir in os.listdir(base_dir):
        case_path = os.path.join(base_dir, case_dir)
        if not os.path.isdir(case_path):
            continue

        biospecimen_dir = os.path.join(case_path, 'Biospecimen')
        if not os.path.exists(biospecimen_dir):
            print(f"No Biospecimen folder for case: {case_dir}")
            continue

        for root, _, files in os.walk(biospecimen_dir):
            for file in files:
                if file.endswith('.svs'):
                    slide_path = os.path.join(root, file)
                    stats.append(get_slide_stats(slide_path))
    return stats


def save_stats_to_csv(slide_stats, base_dir):
    """
    Save slide statistics to a CSV file in the base directory.
    """
    stats_file = os.path.join(base_dir, 'slide_statistics.csv')
    df = pd.DataFrame(slide_stats)
    df.to_csv(stats_file, index=False)
    print(f"Slide statistics saved to {stats_file}")


In [9]:
base_dir = 'cases_TEST_TRAIN_100'

slide_stats = get_all_slides_stats(base_dir)
save_stats_to_csv(slide_stats, base_dir)

{'File': 'TCGA-AA-3524-01A-01-BS1.26bb278f-a41e-43be-be0b-a16a78d23272.svs', 'MPP X': '0.50149999999999995', 'MPP Y': '0.50149999999999995', 'Base Magnification': 20.0, 'Width': 12000, 'Height': 10441, 'Levels': 2, 'Downsamples': (1.0, 4.000191570881226), 'Tile Width': 12000, 'Tile Height': 10441, 'Magnifications': [20.0, 4.999760547866481]}
{'File': 'TCGA-AA-3524-01A-02-BS2.5ec2a6e7-96ad-4e7e-ab9f-92e9f30df438.svs', 'MPP X': '0.50149999999999995', 'MPP Y': '0.50149999999999995', 'Base Magnification': 20.0, 'Width': 10000, 'Height': 12087, 'Levels': 2, 'Downsamples': (1.0, 4.000496524329693), 'Tile Width': 10000, 'Tile Height': 12087, 'Magnifications': [20.0, 4.99937942162095]}
{'File': 'TCGA-AA-3524-01Z-00-DX1.b1aae264-87be-4514-8f9d-25660b39caa7.svs', 'MPP X': '0.23250000000000001', 'MPP Y': '0.23250000000000001', 'Base Magnification': 20.0, 'Width': 49920, 'Height': 49920, 'Levels': 4, 'Downsamples': (1.0, 4.0, 16.0, 32.0), 'Tile Width': 49920, 'Tile Height': 49920, 'Magnifications'