In [6]:
import os
import numpy as np
import matplotlib.pyplot as plt
import glob
from PIL import Image
import tifffile
import pandas as pd
import json

In [None]:
train_df= pd.read_csv('Data/train.csv')
train_df['image_path']= None
for i in range(len(train_df)):
    name= train_df.loc[i, 'id']
    path= f'Data/train_images/{name}.tiff'
    train_df.loc[i, 'image_path']= path
train_df.head(3)

In [None]:
from sklearn.model_selection import KFold,StratifiedKFold

train_df['fold']= None
kf= StratifiedKFold(n_splits=5, random_state=123, shuffle=True)
for i, (train_index, test_index) in enumerate(kf.split(train_df['image_path'], train_df['organ'])):
    for indx in test_index:
        train_df.loc[indx, 'fold']= i
        
train_df.to_csv('train.csv', index=False)
train_df.head(3)

In [None]:
train_df['organ'].hist()

In [None]:
train_df= pd.read_csv('Data/train.csv')
img_size= train_df[['img_height', 'img_width']].values.tolist()
img_size= list(set([tuple(ti) for ti in img_size]))
img_size

In [3]:
def mask2rle(img):
    '''
    img: numpy array, 1 - mask, 0 - background
    Returns run length as string formated
    '''
    pixels= img.T.flatten()
    pixels = np.concatenate([[0], pixels, [0]])
    runs = np.where(pixels[1:] != pixels[:-1])[0] + 1
    runs[1::2] -= runs[::2]
    return ' '.join(str(x) for x in runs)

def rle_decode(mask_rle, shape, color=1):
    '''
    mask_rle: run-length as string formated (start length)
    shape: (height,width) of array to return 
    Returns numpy array, 1 - mask, 0 - background
    '''
    s = mask_rle.split()
    starts, lengths = [np.asarray(x, dtype=int) for x in (s[0:][::2], s[1:][::2])]
    starts -= 1
    ends = starts + lengths
    img = np.zeros(shape[0] * shape[1], dtype=np.float32)
    for lo, hi in zip(starts, ends):
        img[lo : hi] = color
    return img.reshape(shape).T


# for i in range(3):
#     name= train_df.loc[i, 'id']
#     path= f'Data/train_images/{name}.tiff'
#     img= np.array(Image.open(path))
#     rle= train_df.loc[i, 'rle']
#     mask= rle_decode(rle, img.shape[:2])
    
#     plt.imshow(img)
#     plt.show()
#     plt.imshow(mask)
#     plt.show()
#     mix= (img[..., 0] + mask*255)/2
#     plt.imshow(mix.astype(np.uint8))
#     plt.show()

# EX_Data

In [None]:
from tifffile import imread
import cv2
from tqdm.auto import tqdm

def rle_decode(mask_rle, shape=(1600,256)):
    '''
    mask_rle: run-length as string formated (start length)
    shape: (width,height) of array to return 
    Returns numpy array, 1 - mask, 0 - background

    '''
    s = mask_rle.split()
    starts, lengths = [np.asarray(x, dtype=int) for x in (s[0:][::2], s[1:][::2])]
    starts -= 1
    ends = starts + lengths
    img = np.zeros(shape[0]*shape[1], dtype=np.uint8)
    for lo, hi in zip(starts, ends):
        img[lo:hi] = 1
    return img.reshape(shape).T


img_path= glob.glob('Data/train_images_ex/**/*png', recursive=True)
mask_path= glob.glob('Data/train_mask_ex/**/*png', recursive=True)
train_df= pd.read_csv('Data/train.csv')
df= pd.DataFrame(columns= train_df.columns)
df['data_source']= 'Hubmap'
df['fold']= -1
df['rle']= ''

for i in tqdm(range(len(mask_path))):
    path= img_path[i]
    mask= np.array(Image.open(mask_path[i]))
    df.loc[i, 'image_path']= path
    df.loc[i, 'rle']= mask2rle(mask)
df.to_csv('Data/train_ex.csv', index= False)
df

In [28]:
from tifffile import imread
from tqdm import tqdm

def rle_decode(mask_rle, shape=(1600,256)):
    '''
    mask_rle: run-length as string formated (start length)
    shape: (width,height) of array to return 
    Returns numpy array, 1 - mask, 0 - background

    '''
    s = mask_rle.split()
    starts, lengths = [np.asarray(x, dtype=int) for x in (s[0:][::2], s[1:][::2])]
    starts -= 1
    ends = starts + lengths
    img = np.zeros(shape[0]*shape[1], dtype=np.uint8)
    for lo, hi in zip(starts, ends):
        img[lo:hi] = 1
    return img.reshape(shape).T

def rle_encode(img):
    #the image should be transposed
    pixels = img.T.flatten()
    # This simplified method requires first and last pixel to be zero
    pixels[0] = 0
    pixels[-1] = 0
    runs = np.where(pixels[1:] != pixels[:-1])[0] + 2
    runs[1::2] -= runs[::2]
    return ' '.join(str(x) for x in runs)


df= pd.read_csv('Data/train_images_ex_full/train.csv')
df= df.rename(columns= {'encoding': 'rle'})
df['image_path']= None
df['mask_path']= None

for i in tqdm(range(len(df))):
    id_= df.loc[i, 'id']
    df.loc[i, 'image_path']= f'Data/train_images_ex_full/{id_}.tiff'
    rle= df.loc[i, 'rle']
    img= imread(df.loc[i, 'image_path'])
    mask= rle_decode(rle, shape=img.shape[:2][::-1])
    mask*= 255
    im= Image.fromarray(mask.astype(np.uint8))
    im.save(f'Data/train_mask_ex_full/{id_}.png')
    df.loc[i, 'mask_path']= f'Data/train_mask_ex_full/{id_}.png'
df= df.drop(['rle'], axis=1)
df.to_csv('Data/train_ex_full.csv', index= False)

100%|██████████████████████████████████████████████████████████████████████████████████| 15/15 [03:51<00:00, 15.43s/it]


In [8]:
from tifffile import imread
from tqdm import tqdm
import cv2

imgs= glob.glob('Data/train_ex_1/train_img/**/*tiff', recursive=True)
df= pd.DataFrame(columns=('image_path', 'mask_path'))
for i, path in enumerate(imgs):
    df.loc[i, 'image_path']= path
    path= path.replace('train_img', 'train_mask')
    path= path.replace('tiff', 'png')
    df.loc[i, 'mask_path']= path
df.to_csv('Data/ex_data.csv', index=False)

In [10]:
imgs= glob.glob('Data/train_ex_1/train_mask/**/*tiff', recursive=True)
for path in imgs:
    img= imread(path)
    print(img.shape)
    im= Image.fromarray(img)
    im.save(path.replace('tiff', 'png'))

Shaped series: axes do not match shape


(4536, 4704)


Shaped series: axes do not match shape


(4536, 4704)


Shaped series: axes do not match shape


(4536, 4704)


Shaped series: axes do not match shape


(4536, 4704)


Shaped series: axes do not match shape


(4536, 4704)


Shaped series: axes do not match shape


(4536, 4704)


# check tiff read issue

In [5]:
from tifffile import imread

df= pd.read_csv('Data/ex_data_kidney.csv')
for path in df['image_path']:
    img= imread(path)
    img= np.squeeze(img)
    if img.shape[0]==3: img= np.transpose(img, (1,2,0))
    print(img.shape)
    del img

(31278, 25794, 3)
(31299, 44066, 3)
(18484, 13013, 3)


KeyboardInterrupt: 

# stain normalization

In [21]:
import staintools
from tqdm.auto import tqdm

target = staintools.read_image('Data/test_images/10078.tiff')
source_img= glob.glob('Data/train_images/**/*tiff', recursive=True)

for path in tqdm(source_img[:]):
    to_transform = staintools.read_image(path)

    # Standardize brightness (optional, can improve the tissue mask calculation)
    target = staintools.LuminosityStandardizer.standardize(target)
    to_transform = staintools.LuminosityStandardizer.standardize(to_transform)

    # Stain normalize
    normalizer = staintools.StainNormalizer(method='vahadane')
    normalizer.fit(target)
    transformed1 = normalizer.transform(to_transform)
    
    name= path.split('\\')[-1]
    tifffile.imwrite(f'../Data/{name}', transformed1)

  0%|          | 0/351 [00:00<?, ?it/s]

  return A / np.linalg.norm(A, axis=1)[:, None]
  source_concentrations *= (self.maxC_target / maxC_source)
  source_concentrations *= (self.maxC_target / maxC_source)
