# Tesselation for TCGA
- Generates tesselation on TCGA testset from training 1.

In [1]:
# !pip install patchify

In [2]:
from PIL import Image
import cv2
from PIL import Image
from patchify import patchify, unpatchify
from skimage import data
from skimage import color
from skimage import img_as_float

In [3]:
'Load metadata'
data_fld = '../data'
sub_fld = os.path.join(data_fld, 'TCGA_data')
path = os.path.join(sub_fld, 'tcga_wsi_meta.csv')
df_wsi = pd.read_csv(path, index_col=0)
path = os.path.join(sub_fld, 'tcga_tile_meta.csv')
df_tile = pd.read_csv(path)

In [4]:
'Load predictions post-inference'
path = os.path.join(sub_fld, '0107_pred_TCGA_2.csv')
df_tst = pd.read_csv(path, index_col=0)
df_tst.head(2)

Unnamed: 0,wsi_id,x_tile_coord,y_tile_coord,clinical_donor_id,wsi_name,clinical_sample_id,primary_tumor_type,CNA_data,ABSOLUTE_purity,rna_subtype,...,APC,pred_prob,CBT,CBTA,CBTP,CBT3,CBTPA,CBTP3,prediction,tile_entropy
0,TCGA-D3-A8GO-06Z-00-DX1.svs,4,8,TCGA-D3-A8GO,TCGA-D3-A8GO-06Z-00-DX1.357CD90F-23D3-45BB-BA1...,TCGA-D3-A8GO-06,,True,0.37,Common,...,0,[2.66175869e-23 1.18447545e-07 1.49605581e-18 ...,2.6617590000000005e-23,1.184475e-07,1.496056e-18,6.188183e-20,1.0,8.997108e-12,CBTPA,1e-06
1,TCGA-D9-A4Z5-01Z-00-DX1.svs,7,1,TCGA-D9-A4Z5,TCGA-D9-A4Z5-01Z-00-DX1.88AC8735-B520-4FCE-BC0...,TCGA-D9-A4Z5-01,ACRAL/NON-CUTANEOUS,True,0.93,Common,...,0,[9.2290703e-11 2.4995705e-01 2.4780464e-11 5.9...,9.22907e-11,0.249957,2.478046e-11,5.90947e-06,0.750037,4.095771e-08,Uncertain,0.313861


In [5]:
'Create output folder'
ds_fld = os.path.join(home, 'melanoma_images')
ds_fld = os.path.join(ds_fld, '2021_1_12_tessellation_TCGA')

if os.path.isdir(ds_fld) is False:
    os.mkdir(ds_fld)
# classes = ['CBT', 'CBTA', 'CBTP', 'CBT3', 'CBTPA', 'CBTP3']
# for cl in classes + 'Multiple':
#     cl_fld = os.path.join(ds_fld, cl)
#     if os.path.isdir(cl_fld) is False:
#         os.mkdir(cl_fld)

In [6]:
'Load helpers'
colors = {'CBT': 0.1, 'CBT3': 0.2, 'CBTP': 0.3, 'CBTP3': 0.55, 
          'CBTPA': 0.75, 'CBTA': 0.95, 'Uncertain': 0}
saturation = {'CBT': 0.5, 'CBT3': 0.5, 'CBTP': 0.5, 'CBTP3': 0.5, 
              'CBTPA': 0.5, 'CBTA': 0.5, 'Uncertain': 0}
light = {'CBT': 1, 'CBT3': 1, 'CBTP': 1, 'CBTP3': 1, 
         'CBTPA': 1, 'CBTA': 1, 'Uncertain': 0.5}


def colorize(image, hue, saturation=1, light=0):
    """
        Modify hue, saturation and value of an image
    """
    hsv = color.rgb2hsv(image)
    hsv[:, :, 1] = saturation
    hsv[:, :, 0] = hue
    hsv[:, :, 2] = light  # ligt
    return color.hsv2rgb(hsv)

In [7]:
# path to whole slides in PNG format
img_fld = os.path.join(home, '/home/tbiancal/melanoma_dataset/TCGA_WSI_PNG')

In [9]:
'Do tesselation'
iterator = df_tst.groupby(['wsi_id',]).count().index.to_frame().values

issues = []
not_found = []

for wsi_name in tqdm(df_tile.wsi_id.unique()):
    # load WSI_PNG
    png_name = wsi_name.split('.svs')[0] + '-WSI.png'
    img_path = os.path.join(img_fld, png_name)
    wsi_img = cv2.imread(img_path, 3)
    
    if wsi_img is None:
        not_found.append([png_name])
    else:
        # Get tile DF for WSI
        mask_1 = df_tst.wsi_id == wsi_name
        mask = mask_1
        df_img = df_tst[mask]

        # Tessel patches
        patches = patchify(wsi_img, (82, 82, 3), step=82)

        tile_iterator = zip(df_img['y_tile_coord'], df_img['x_tile_coord'], df_img['prediction'])
        for i, j, p in tile_iterator:
            try:
                col_factor = colorize(patches[i, j, 0], colors[p], saturation[p], light[p])  
                patches[i, j, 0] = patches[i, j, 0] * col_factor
            except:
                issues.append([p, i, j, wsi_name])

        # Path to save the tassellations
        new_img = unpatchify(patches, wsi_img.shape)
        pic = Image.fromarray(new_img)
        out_path = os.path.join(ds_fld, os.path.basename(img_path))
        pic.save(out_path)

100%|██████████| 466/466 [01:58<00:00,  3.94it/s]


In [10]:
issues

[]

In [11]:
not_found

[]

In [13]:
# 'Test on genotype "Multiple"'
# mask = df_tst.genotype == 'Multiple'
# wsi_names = df_tst[mask].wsi_name.unique()

# issues = []
# not_found = []

# for wsi_name in wsi_names:
#     # load WSI_PNG
#     png_name = wsi_name.split('.svs')[0] + '-WSI.png'
#     img_path = os.path.join(img_fld, 'Multiple')
#     img_path = os.path.join(img_path, png_name)
#     assert os.path.isfile(img_path)
#     wsi_img = cv2.imread(img_path, 3)
#     if wsi_img is None:
#         not_found.append([genotype, png_name])
#     else:
#         # Get tile DF for WSI
#         mask_1 = df_tst.wsi_name == wsi_name
#         mask_2 = df_tst.genotype == 'Multiple'
#         mask = mask_1 & mask_2
#         df_wsi = df_tst[mask]

#         # Tessel patches
#         patches = patchify(wsi_img, (82, 82, 3), step=82)

#         tile_iterator = zip(df_wsi['y_tile_coord'], df_wsi['x_tile_coord'], df_wsi['prediction'])
#         for i, j, p in tile_iterator:
#             try:
#                 col_factor = colorize(patches[i, j, 0], colors[p], saturation[p], light[p])  
#                 patches[i, j, 0] = patches[i, j, 0] * col_factor
#             except:
#                 issues.append([p, i, j, wsi_name, genotype])

#         # Path to save the tassellations
#         new_img = unpatchify(patches, wsi_img.shape)
#         pic = Image.fromarray(new_img)
#         out_path = os.path.join(ds_fld, 'Multiple')
#         out_path = os.path.join(out_path, os.path.basename(img_path))
#         pic.save(out_path)