In [20]:
# Install the OpenSlide C library and Python bindings
!apt-get install openslide-tools
!pip install openslide-python

Reading package lists... Done
Building dependency tree       
Reading state information... Done
openslide-tools is already the newest version (3.4.1+dfsg-2).
The following package was automatically installed and is no longer required:
  libnvidia-common-410
Use 'apt autoremove' to remove it.
0 upgraded, 0 newly installed, 0 to remove and 6 not upgraded.


## Mount Google Drive

**If Timeout Error, please try using non-columbia google account to mount the drive. Note that if using non-columbia google account the slides directory should also be shared with your non-columbia google account.** 

In [21]:
from google.colab import drive
drive.mount('/content/gdrive')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


If successfully mounted, cell will ouput "Mounted at /content/gdrive".

Go to left sidebar -> files -> .. -> content -> gdrive -> My Drive -> slides

to visually check that the data has been mounted.

In [22]:
import os

MY_DRIVE_DIR = '/content/gdrive/My Drive'
SLIDES_DIR = os.path.join(MY_DRIVE_DIR, 'slides')

print(MY_DRIVE_DIR)
print(os.path.exists(MY_DRIVE_DIR))
print(SLIDES_DIR)
print(os.path.exists(SLIDES_DIR))

/content/gdrive/My Drive
True
/content/gdrive/My Drive/slides
True


In [23]:
# get slide nums.

UNUSED = {'038', '099'} # missing mask.tif files.

def get_num_from_fname(fname):
    ''' works for tissue.tif, mask.tif, and .xml files. '''
    name, ext = fname.split('.')
    num = name[6:9]
    return num

SLIDE_NUMS = set(get_num_from_fname(fname) for fname in os.listdir(SLIDES_DIR))
SLIDE_NUMS -= UNUSED
SLIDE_NUMS = list(SLIDE_NUMS)
SLIDE_NUMS.sort()

print(len(SLIDE_NUMS), 'slide nums:', SLIDE_NUMS)

21 slide nums: ['001', '002', '005', '012', '016', '019', '023', '031', '035', '057', '059', '064', '075', '078', '081', '084', '091', '094', '096', '101', '110']


## Verify slides as per starter-code.ipynb

In [0]:
from openslide import open_slide, __library_version__ as openslide_version

In [0]:
def verify_slide(slide_path, tumor_mask_path):
  ''' see cell 4 in starter-code.ipynb '''
  slide = open_slide(slide_path)
  print ("Read WSI from %s with width: %d, height: %d" % (slide_path, 
                                                          slide.level_dimensions[0][0], 
                                                          slide.level_dimensions[0][1]))

  tumor_mask = open_slide(tumor_mask_path)
  print ("Read tumor mask from %s" % (tumor_mask_path))

  print("Slide includes %d levels", len(slide.level_dimensions))
  
  # try limiting to 8 levels. 
  # for i in range(len(slide.level_dimensions)):
  for i in range(8):
      print("Level %d, dimensions: %s downsample factor %d" % (i, 
                                                               slide.level_dimensions[i], 
                                                               slide.level_downsamples[i]))
      assert tumor_mask.level_dimensions[i][0] == slide.level_dimensions[i][0]
      assert tumor_mask.level_dimensions[i][1] == slide.level_dimensions[i][1]

  # Verify downsampling works as expected
  width, height = slide.level_dimensions[7]
  assert width * slide.level_downsamples[7] == slide.level_dimensions[0][0]
  assert height * slide.level_downsamples[7] == slide.level_dimensions[0][1]
  
  print('\n\n')

In [26]:
for num in SLIDE_NUMS:
  slide_path = os.path.join(SLIDES_DIR, 'tumor_' + num + '.tif')
  tumor_mask_path = os.path.join(SLIDES_DIR, 'tumor_' + num + '_mask.tif')  
  verify_slide(slide_path, tumor_mask_path)

Read WSI from /content/gdrive/My Drive/slides/tumor_001.tif with width: 97792, height: 221184
Read tumor mask from /content/gdrive/My Drive/slides/tumor_001_mask.tif
Slide includes %d levels 10
Level 0, dimensions: (97792, 221184) downsample factor 1
Level 1, dimensions: (48896, 110592) downsample factor 2
Level 2, dimensions: (24448, 55296) downsample factor 4
Level 3, dimensions: (12224, 27648) downsample factor 8
Level 4, dimensions: (6112, 13824) downsample factor 16
Level 5, dimensions: (3056, 6912) downsample factor 32
Level 6, dimensions: (1528, 3456) downsample factor 64
Level 7, dimensions: (764, 1728) downsample factor 128



Read WSI from /content/gdrive/My Drive/slides/tumor_002.tif with width: 97792, height: 219648
Read tumor mask from /content/gdrive/My Drive/slides/tumor_002_mask.tif
Slide includes %d levels 10
Level 0, dimensions: (97792, 219648) downsample factor 1
Level 1, dimensions: (48896, 109824) downsample factor 2
Level 2, dimensions: (24448, 54912) downsample f