# Mask Thumbnail Dictionary Creator

## Imports

In [1]:
import pickle
import matplotlib.pyplot as plt
import numpy as np
import openslide
import os
import pandas as pd
from torchvision import transforms

## Create Thumbnail Dictionary

In [2]:
# define the input image dimensions
PATCH_WIDTH = 256
PATCH_HEIGHT = 256

# Location of the training images
DATA_PATH = '../../ganz/data/panda_dataset'

# mask directory
mask_dir = f'{DATA_PATH}/train_label_masks'

# Location of training labels
train = pd.read_csv(f'{DATA_PATH}/train.csv').set_index('image_id')

# List of wsi names
wsi_names = list(train.index)

In [3]:
# Create thumbnails dictionary
thumbnails = {}

# Initialize transformation for each mask
transformations = transforms.Compose([transforms.ToTensor()])

# Get thumbnail of each wsi mask and add it as an array to the dictionary
for wsi_name in wsi_names:
  mask_path = os.path.join(mask_dir, f'{wsi_name}_mask.tiff')
  if os.path.exists(mask_path):
    mask = openslide.OpenSlide(mask_path)
    width, height = mask.dimensions
    thumbnail = mask.get_thumbnail((width/PATCH_WIDTH, height/PATCH_HEIGHT)).convert('RGB')
    # plt.imshow(thumbnail)
    thumbnail = np.asarray(thumbnail, dtype=np.uint8)
    thumbnail = transformations(thumbnail)
    thumbnails[wsi_name] = thumbnail[0]
    # print(thumbnails[wsi_name].size())

In [None]:
# # Convert tensors to arrays if needed
# for wsi_name in thumbnails:
#     thumbnails[wsi_name] = thumbnails[wsi_name].numpy()

In [4]:
# Open json file and write dictionary to it
with open('./data/thumbnails.p', 'wb') as fp:
    pickle.dump(thumbnails, fp)