In [None]:
import os
import numpy as np
import matplotlib.pyplot as plt
import glob
from PIL import Image
import tifffile
import pandas as pd
import json

In [None]:
train_df= pd.read_csv('Data/train.csv')
train_df['image_path']= None
for i in range(len(train_df)):
    name= train_df.loc[i, 'id']
    path= f'Data/train_images/{name}.tiff'
    train_df.loc[i, 'image_path']= path
train_df.head(3)

In [None]:
from sklearn.model_selection import KFold,StratifiedKFold

train_df['fold']= None
kf= StratifiedKFold(n_splits=5, random_state=123, shuffle=True)
for i, (train_index, test_index) in enumerate(kf.split(train_df['image_path'], train_df['organ'])):
    for indx in test_index:
        train_df.loc[indx, 'fold']= i
        
train_df.to_csv('train.csv', index=False)
train_df.head(3)

In [None]:
train_df['organ'].hist()

In [None]:
train_df= pd.read_csv('Data/train.csv')
img_size= train_df[['img_height', 'img_width']].values.tolist()
img_size= list(set([tuple(ti) for ti in img_size]))
img_size

In [None]:
def mask2rle(img):
    '''
    img: numpy array, 1 - mask, 0 - background
    Returns run length as string formated
    '''
    pixels= img.T.flatten()
    pixels = np.concatenate([[0], pixels, [0]])
    runs = np.where(pixels[1:] != pixels[:-1])[0] + 1
    runs[1::2] -= runs[::2]
    return ' '.join(str(x) for x in runs)

def rle_decode(mask_rle, shape, color=1):
    '''
    mask_rle: run-length as string formated (start length)
    shape: (height,width) of array to return 
    Returns numpy array, 1 - mask, 0 - background
    '''
    s = mask_rle.split()
    starts, lengths = [np.asarray(x, dtype=int) for x in (s[0:][::2], s[1:][::2])]
    starts -= 1
    ends = starts + lengths
    img = np.zeros(shape[0] * shape[1], dtype=np.float32)
    for lo, hi in zip(starts, ends):
        img[lo : hi] = color
    return img.reshape(shape).T


# for i in range(3):
#     name= train_df.loc[i, 'id']
#     path= f'Data/train_images/{name}.tiff'
#     img= np.array(Image.open(path))
#     rle= train_df.loc[i, 'rle']
#     mask= rle_decode(rle, img.shape[:2])
    
#     plt.imshow(img)
#     plt.show()
#     plt.imshow(mask)
#     plt.show()
#     mix= (img[..., 0] + mask*255)/2
#     plt.imshow(mix.astype(np.uint8))
#     plt.show()

# EX_Data

In [None]:
from tifffile import imread
import cv2
from tqdm.auto import tqdm

def rle_decode(mask_rle, shape=(1600,256)):
    '''
    mask_rle: run-length as string formated (start length)
    shape: (width,height) of array to return 
    Returns numpy array, 1 - mask, 0 - background

    '''
    s = mask_rle.split()
    starts, lengths = [np.asarray(x, dtype=int) for x in (s[0:][::2], s[1:][::2])]
    starts -= 1
    ends = starts + lengths
    img = np.zeros(shape[0]*shape[1], dtype=np.uint8)
    for lo, hi in zip(starts, ends):
        img[lo:hi] = 1
    return img.reshape(shape).T


img_path= glob.glob('Data/train_images_ex/**/*png', recursive=True)
mask_path= glob.glob('Data/train_mask_ex/**/*png', recursive=True)
train_df= pd.read_csv('Data/train.csv')
df= pd.DataFrame(columns= train_df.columns)
df['data_source']= 'Hubmap'
df['fold']= -1
df['rle']= ''

for i in tqdm(range(len(mask_path))):
    path= img_path[i]
    mask= np.array(Image.open(mask_path[i]))
    df.loc[i, 'image_path']= path
    df.loc[i, 'rle']= mask2rle(mask)
df.to_csv('Data/train_ex.csv', index= False)
df

In [None]:
from tifffile import imread
from tqdm import tqdm

def rle_decode(mask_rle, shape=(1600,256)):
    '''
    mask_rle: run-length as string formated (start length)
    shape: (width,height) of array to return 
    Returns numpy array, 1 - mask, 0 - background

    '''
    s = mask_rle.split()
    starts, lengths = [np.asarray(x, dtype=int) for x in (s[0:][::2], s[1:][::2])]
    starts -= 1
    ends = starts + lengths
    img = np.zeros(shape[0]*shape[1], dtype=np.uint8)
    for lo, hi in zip(starts, ends):
        img[lo:hi] = 1
    return img.reshape(shape).T

def rle_encode(img):
    #the image should be transposed
    pixels = img.T.flatten()
    # This simplified method requires first and last pixel to be zero
    pixels[0] = 0
    pixels[-1] = 0
    runs = np.where(pixels[1:] != pixels[:-1])[0] + 2
    runs[1::2] -= runs[::2]
    return ' '.join(str(x) for x in runs)


df= pd.read_csv('Data/train_images_ex_full/train.csv')
df= df.rename(columns= {'encoding': 'rle'})
df['image_path']= None
df['mask_path']= None

for i in tqdm(range(len(df))):
    id_= df.loc[i, 'id']
    df.loc[i, 'image_path']= f'Data/train_images_ex_full/{id_}.tiff'
    rle= df.loc[i, 'rle']
    img= imread(df.loc[i, 'image_path'])
    mask= rle_decode(rle, shape=img.shape[:2][::-1])
    mask*= 255
    im= Image.fromarray(mask.astype(np.uint8))
    im.save(f'Data/train_mask_ex_full/{id_}.png')
    df.loc[i, 'mask_path']= f'Data/train_mask_ex_full/{id_}.png'
df= df.drop(['rle'], axis=1)
df.to_csv('Data/train_ex_full.csv', index= False)

In [None]:
from tifffile import imread
from tqdm import tqdm
import cv2

imgs= glob.glob('Data/train_ex_1/train_img/**/*tiff', recursive=True)
df= pd.DataFrame(columns=('image_path', 'mask_path'))
for i, path in enumerate(imgs):
    df.loc[i, 'image_path']= path
    path= path.replace('train_img', 'train_mask')
    path= path.replace('tiff', 'png')
    df.loc[i, 'mask_path']= path
df.to_csv('Data/ex_data.csv', index=False)

In [None]:
imgs= glob.glob('Data/train_ex_1/train_mask/**/*tiff', recursive=True)
for path in imgs:
    img= imread(path)
    print(img.shape)
    im= Image.fromarray(img)
    im.save(path.replace('tiff', 'png'))

# check tiff read issue

In [None]:
from tifffile import imread

df= pd.read_csv('Data/ex_data_kidney.csv')
for path in df['image_path']:
    img= imread(path)
    img= np.squeeze(img)
    if img.shape[0]==3: img= np.transpose(img, (1,2,0))
    print(img.shape)
    del img

# stain normalization

In [None]:
import staintools
from tqdm.auto import tqdm

target = staintools.read_image('Data/test_images/10078.tiff')
source_img= glob.glob('Data/train_images/**/*tiff', recursive=True)

for path in tqdm(source_img[:]):
    to_transform = staintools.read_image(path)

    # Standardize brightness (optional, can improve the tissue mask calculation)
    target = staintools.LuminosityStandardizer.standardize(target)
    to_transform = staintools.LuminosityStandardizer.standardize(to_transform)

    # Stain normalize
    normalizer = staintools.StainNormalizer(method='vahadane')
    normalizer.fit(target)
    transformed1 = normalizer.transform(to_transform)
    
    name= path.split('\\')[-1]
    tifffile.imwrite(f'../Data/{name}', transformed1)

In [None]:
import os
import re
import glob
import cv2
import numpy as np
import pandas as pd
from ipywidgets import interact
from PIL import Image
from math import ceil
from bs4 import BeautifulSoup
import requests
from io import StringIO, BytesIO
from tqdm.auto import tqdm

import torch
import matplotlib.pyplot as plt
import zipfile

df = pd.read_csv('Data/GTEx Portal.csv')
df.head()

In [None]:
# organ_names = ['Kidney','Prostate','Colon', 'Spleen','Lung']
# df_sub = df.loc[df['Tissue'].str.contains('Kidney|Prostate|Colon|Spleen|Lung')]
debug = False
organ_name = 'Prostate'
df_organ = df.loc[df['Tissue'].str.contains(organ_name)]
print(df_organ.shape)
df_organ.head()

In [None]:
index = 6
count = 10
start_index = count*index
end_index = count*(index+1)
df_organ_sub = df_organ[start_index:end_index]
df_organ_sub.shape

In [None]:
def download_tiff(tissue_sample_id, img_out):
    url = f"https://brd.nci.nih.gov/brd/imagedownload/{tissue_sample_id}"
    r = requests.get(url)
    img_out.writestr(f'{tissue_sample_id}.tiff', r.content)
    
def map_organ(tissue):
    organ_name = ''
    if 'Kidney' in tissue:
         organ_name = 'kidney'
    elif 'Colon' in tissue:
        organ_name = 'largeintestine'
    elif 'Spleen' in tissue:
        organ_name = 'spleen'
    elif 'Prostate' in tissue:
        organ_name = 'prostate'
    elif 'Lung' in tissue:
        organ_name = 'lung'
    else:
        print(f'tissue is {tissue}')
    return organ_name

import matplotlib.pyplot as plt
def show_image(img,mask=None):
    plt.figure(figsize=(10,10))
    plt.imshow(img)
    if mask is not None:
        plt.imshow(mask, cmap='coolwarm', alpha=0.5)
    plt.axis("off")
    

def show_images(img_list, rows=5, cols=3):
    # create the figure
    fig, axs = plt.subplots(nrows=rows, ncols=cols, figsize=(20, 20))
    # flatten the axis into a 1-d array to make it easier to access each axes
    axs = axs.flatten()
    # iterate through and enumerate the files, use i to index the axes
    for i, img in enumerate(img_list):
        # add the image to the axes
        axs[i].imshow(img)
        # add an axes title; .stem is a pathlib method to get the filename
        axs[i].set(title=f'{i:04d}')

In [None]:
images_out = f'Data/hubmap_external/hubmap_organ_{organ_name}_{start_index}_{end_index}.zip'
with zipfile.ZipFile(images_out, 'w') as img_out:
    for index, (t, d) in tqdm(enumerate(df_organ_sub.iterrows()),total=df_organ_sub.shape[0]):
        tissue_sample_id = d['Tissue Sample ID']
        tissue = d['Tissue']
        sex = d['Sex']
        age_bracket = d['Age Bracket']
        organ_name = map_organ(tissue)   
        download_tiff(tissue_sample_id, img_out)

# Slice Image

In [None]:
from tifffile import imread
import glob
import os
from tqdm.auto import tqdm
import image_slicer
from PIL import Image

imgs= glob.glob('Data/train_ex_largeintestine/train_mask_png/**png')
t= 0
for path in tqdm(imgs[:]):
    tiles = image_slicer.slice(path, 4, save=False)
    for tile in tiles:
        img= np.array(tile.image)
        im= Image.fromarray(img)
        im.save(f'Data/train_ex_largeintestine/train_mask_tiles/{t+1}.png')
        t+= 1

In [None]:
import pandas as pd

def rle_encode(img):
    #the image should be transposed
    pixels = img.T.flatten()
    # This simplified method requires first and last pixel to be zero
    pixels[0] = 0
    pixels[-1] = 0
    runs = np.where(pixels[1:] != pixels[:-1])[0] + 2
    runs[1::2] -= runs[::2]
    return ' '.join(str(x) for x in runs)

df= pd.DataFrame(columns=('organ', 'image_path', 'rle',))
imgs= glob.glob('Data/train_ex_largeintestine/train_img_tiles/**png')
for i, path in enumerate(imgs):
    df.loc[i, 'image_path']= path
    mask= np.array(Image.open(path.replace('train_img_tiles', 'train_mask_tiles')))
    mask= (mask/255).astype(np.uint8)
    rle= rle_encode(mask)
    df.loc[i, 'rle']= rle
    df.loc[i, 'organ']= 'largeintestine'
df

# Slice Image kidney

In [None]:
from tifffile import imread
import glob
import os
from tqdm.auto import tqdm
import image_slicer
import numpy as np
from PIL import Image
Image.MAX_IMAGE_PIXELS = 933120000000

imgs= glob.glob('Data/train_ex_kidney/img_png/**png')
t= 0
for path in tqdm(imgs[:]):
    size= Image.open(path).size
    col, row= size[0], size[1]
    col//= 3000
    row//= 3000
    tiles = image_slicer.slice(path, col=col, row=row, save=False)
    for tile in tiles:
        img= np.array(tile.image)
        im= Image.fromarray(img)
        im.save(f'Data/train_ex_kidney/img_tiles/{t+1}.png')
        t+= 1

In [None]:
import pandas as pd
from tifffile import imread
import glob
import os
from tqdm.auto import tqdm
import image_slicer
import numpy as np
from PIL import Image

def rle_encode(img):
    #the image should be transposed
    pixels = img.T.flatten()
    # This simplified method requires first and last pixel to be zero
    pixels[0] = 0
    pixels[-1] = 0
    runs = np.where(pixels[1:] != pixels[:-1])[0] + 2
    runs[1::2] -= runs[::2]
    return ' '.join(str(x) for x in runs)

df= pd.DataFrame(columns=('organ', 'image_path', 'rle',))
imgs= glob.glob('Data/train_ex_kidney/img_tiles/**png')
for i, path in enumerate(tqdm(imgs)):
    df.loc[i, 'image_path']= path
    mask= np.array(Image.open(path.replace('img_tiles', 'mask_tiles')))
    mask= (mask/255).astype(np.uint8)
    rle= rle_encode(mask)
    df.loc[i, 'rle']= rle
    df.loc[i, 'organ']= 'kidney'
df