In [3]:
#Not all these packages are required but they are here for completeness
import glob
import io
from ipywidgets import interact
import matplotlib.pyplot as plt
import numpy as np
import os
import openslide
from openslide import open_slide
import pandas as pd
from PIL import Image, ImageOps, ImageChops
import seaborn as sns
import SimpleITK as sitk
from skimage.color import rgb2hed
from skimage.exposure import histogram
from skimage.filters import threshold_otsu
from skimage import morphology
import time
import warnings

In [42]:
SLIDES_PATH = '/gpfs1/scratch/90days/s4436005/Slides'
TP53_SAVE_PATH = '/gpfs1/scratch/90days/s4436005/Slides/jpegs/TP53/'
HE_SAVE_PATH = '/gpfs1/scratch/90days/s4436005/Slides/jpegs/HandE/'

In [5]:
os.chdir(SLIDES_PATH)
os.getcwd()

'/gpfs1/scratch/90days/s4436005/Slides'

In [6]:
#Find Slides in directory
slides = []
for infile in glob.glob('*.svs'):
    file, ext = os.path.splitext(infile)
    slides.append(infile)
slides.sort()
slides

['1792_T_9500_2_TP53.svs',
 '1792_T_9500_3_HandE.svs',
 '1794_T_9504_2_TP53.svs',
 '1794_T_9504_3_HandE.svs',
 '1798_T_9514_2_TP53.svs',
 '1798_T_9514_3_HandE.svs',
 '1820_N_10545A_2_HandE.svs',
 '1820_N_10545A_4_TP53.svs',
 '1820_T_105452F_2_HandE.svs',
 '1820_T_105452F_4_TP53.svs',
 '1843_T_9561_2_TP53.svs',
 '1843_T_9561_3_HandE.svs',
 '1879_T_9580_2_TP53.svs',
 '1879_T_9580_3_HandE.svs',
 '1910_T_9637_2_TP53.svs',
 '1910_T_9637_3_HandE.svs',
 '1915_T_10464_2_TP53.svs',
 '1915_T_10464_3_HandE.svs',
 '1919_T_10779_2_TP53.svs',
 '1919_T_10779_3_HandE.svs',
 '1928_T_9644_2_TP53.svs',
 '1928_T_9644_3_HandE.svs',
 '1932_T_9876_2_TP53.svs',
 '1932_T_9876_3_HandE.svs',
 '1939_T_9661_2_TP53.svs',
 '1939_T_9661_3_HandE.svs',
 '1957_T_9668_2_TP53.svs',
 '1957_T_9668_3_HandE.svs',
 '2011_T_9699_2_TP53.svs',
 '2011_T_9699_3_HandE.svs',
 '2019_T_9717_2_TP53.svs',
 '2019_T_9717_3_HandE.svs',
 '2023_T_9727_2_TP53.svs',
 '2023_T_9727_3_HandE.svs',
 '2036_T_9729_2_TP53.svs',
 '2036_T_9729_3_HandE.sv

In [8]:
TP53_slides = [slide for slide in slides if 'TP53' in slide]
HE_slides = [slide for slide in slides if 'HandE' in slide]

In [11]:
TP53_slides

['1792_T_9500_2_TP53.svs',
 '1794_T_9504_2_TP53.svs',
 '1798_T_9514_2_TP53.svs',
 '1820_N_10545A_4_TP53.svs',
 '1820_T_105452F_4_TP53.svs',
 '1843_T_9561_2_TP53.svs',
 '1879_T_9580_2_TP53.svs',
 '1910_T_9637_2_TP53.svs',
 '1915_T_10464_2_TP53.svs',
 '1919_T_10779_2_TP53.svs',
 '1928_T_9644_2_TP53.svs',
 '1932_T_9876_2_TP53.svs',
 '1939_T_9661_2_TP53.svs',
 '1957_T_9668_2_TP53.svs',
 '2011_T_9699_2_TP53.svs',
 '2019_T_9717_2_TP53.svs',
 '2023_T_9727_2_TP53.svs',
 '2036_T_9729_2_TP53.svs',
 '2038_T_11986_2_TP53.svs',
 '2065_N_12752A_4_TP53.svs',
 '2065_T_12756A_4_TP53.svs',
 '2075_T_9787_2_TP53.svs',
 '2090_T_10594_2_TP53.svs',
 '2107_T_9806_2_TP53.svs',
 '2112_N_12341B_4_TP53.svs',
 '2112_T_12335F_4_TP53.svs',
 '2148_N_11397A_4_TP53.svs',
 '2148_T_11393A_4_TP53.svs',
 '2171_N_11521A_4_TP53.svs',
 '2171_T_11524A_4_TP53.svs',
 '232_T_13230_2_TP53.svs',
 '2382_T_15360_2_TP53.svs',
 '526_T_15907_2_TP53.svs',
 '526_T_15915_2_TP53.svs',
 '526_T_15923_2_TP53.svs']

In [16]:
def highest_mag(slide):
    """Returns the highest magnification for the slide
    """
    return int(slide.properties['aperio.AppMag'])

def level_mags(slide):
    """Returns the magnification for each level in a slide
    """
    return [highest_mag(slide)/downsample for downsample in slide.level_downsamples]

def get_level_size(slide, level):
    """Returns the dimensions of a level
    """
    return slide.level_dimensions[level]

def get_level_mag(slide, level):
    """Returns the magnification at a particular level
    """
    return level_mags(slide)[level]

def get_level_for_mag(slide, mag):
    """Get the level corresponding to a certain magnification, if available
    """
    level_mags_rounded = list(np.round(level_mags(slide), decimals = 2))
    if mag in level_mags_rounded:
        return level_mags_rounded.index(mag)
    else: 
        return None
    
def get_mag_for_size(slide, size):
    max_size = slide.dimensions
    max_mag = highest_mag(slide)
    downsample = np.average([max_dim/size_dim for max_dim, size_dim in zip(max_size, size)])
    return max_mag/downsample

def get_size_for_mag(slide, mag):
    max_size = slide.dimensions
    max_mag = highest_mag(slide)
    downsample = max_mag/mag
    return [np.int(np.round(dim/downsample)) for dim in max_size]

def read_slide_at_mag(slide, mag):
    exact_level = get_level_for_mag(slide, mag)
    if exact_level is not None:
        return slide.read_region((0,0), exact_level, get_level_size(slide, exact_level))
    else:
        max_size = slide.dimensions
        region_size = tuple(get_size_for_mag(slide, mag))
        downsample = np.average([max_dim/region_dim for max_dim, region_dim in zip(max_size, region_size)])
        best_level = slide.get_best_level_for_downsample(downsample)
        best_level_size = get_level_size(slide, best_level)
        best_level_img = slide.read_region((0,0), best_level, best_level_size)
        return best_level_img.resize(region_size, resample = Image.BICUBIC)  

In [44]:
for slide_name in TP53_slides:
    slide = open_slide(slide_name)
    slide_pil = read_slide_at_mag(slide, 1)
    slide_pil.thumbnail((1000,1000))
    slide_pil_name = slide_name[:-4] + '_small.jpeg'
    slide_pil.convert('RGB').save(TP53_SAVE_PATH + slide_pil_name, 'JPEG')
    print(slide_pil_name)

1792_T_9500_2_TP53_small.jpeg
1794_T_9504_2_TP53_small.jpeg
1798_T_9514_2_TP53_small.jpeg
1820_N_10545A_4_TP53_small.jpeg
1820_T_105452F_4_TP53_small.jpeg
1843_T_9561_2_TP53_small.jpeg
1879_T_9580_2_TP53_small.jpeg
1910_T_9637_2_TP53_small.jpeg
1915_T_10464_2_TP53_small.jpeg
1919_T_10779_2_TP53_small.jpeg
1928_T_9644_2_TP53_small.jpeg
1932_T_9876_2_TP53_small.jpeg
1939_T_9661_2_TP53_small.jpeg
1957_T_9668_2_TP53_small.jpeg
2011_T_9699_2_TP53_small.jpeg
2019_T_9717_2_TP53_small.jpeg
2023_T_9727_2_TP53_small.jpeg
2036_T_9729_2_TP53_small.jpeg
2038_T_11986_2_TP53_small.jpeg
2065_N_12752A_4_TP53_small.jpeg
2065_T_12756A_4_TP53_small.jpeg
2075_T_9787_2_TP53_small.jpeg
2090_T_10594_2_TP53_small.jpeg
2107_T_9806_2_TP53_small.jpeg
2112_N_12341B_4_TP53_small.jpeg
2112_T_12335F_4_TP53_small.jpeg
2148_N_11397A_4_TP53_small.jpeg
2148_T_11393A_4_TP53_small.jpeg
2171_N_11521A_4_TP53_small.jpeg
2171_T_11524A_4_TP53_small.jpeg
232_T_13230_2_TP53_small.jpeg
2382_T_15360_2_TP53_small.jpeg
526_T_15907_2_

In [45]:
for slide_name in HE_slides:
    slide = open_slide(slide_name)
    slide_pil = read_slide_at_mag(slide, 1)
    slide_pil.thumbnail((1000,1000))
    slide_pil_name = slide_name[:-4] + '_small.jpeg'
    slide_pil.convert('RGB').save(HE_SAVE_PATH + slide_pil_name, 'JPEG')
    print(slide_pil_name)

1792_T_9500_3_HandE_small.jpeg
1794_T_9504_3_HandE_small.jpeg
1798_T_9514_3_HandE_small.jpeg
1820_N_10545A_2_HandE_small.jpeg
1820_T_105452F_2_HandE_small.jpeg
1843_T_9561_3_HandE_small.jpeg
1879_T_9580_3_HandE_small.jpeg
1910_T_9637_3_HandE_small.jpeg
1915_T_10464_3_HandE_small.jpeg
1919_T_10779_3_HandE_small.jpeg
1928_T_9644_3_HandE_small.jpeg
1932_T_9876_3_HandE_small.jpeg
1939_T_9661_3_HandE_small.jpeg
1957_T_9668_3_HandE_small.jpeg
2011_T_9699_3_HandE_small.jpeg
2019_T_9717_3_HandE_small.jpeg
2023_T_9727_3_HandE_small.jpeg
2036_T_9729_3_HandE_small.jpeg
2038_T_11986_3_HandE_small.jpeg
2065_N_12752A_2_HandE_small.jpeg
2065_T_12756A_2_HandE_small.jpeg
2075_T_9787_3_HandE_small.jpeg
2090_T_10594_3_HandE_small.jpeg
2107_T_9806_3_HandE_small.jpeg
2112_N_12341B_2_HandE_small.jpeg
2112_T_12335F_2_HandE_small.jpeg
2148_N_11397A_2_HandE_small.jpeg
2148_T_11393A_2_HandE_small.jpeg
2171_N_11521A_2_HandE_small.jpeg
2171_T_11524A_2_HandE_small.jpeg
232_T_13230_3_HandE_small.jpeg
2382_T_15360_3