In [1]:
import os
import gc
import cv2
import glob
import pandas as pd
import numpy as np 

## Utils

In [2]:
def get_metadata(row):
    data = row['id'].split('_')
    case = int(data[0].replace('case', ''))
    day = int(data[1].replace('day', ''))
    slice_ = int(data[-1])
    row['case'] = case
    row['day'] = day
    row['slice'] = slice_
    return row

def path2info(row):
    path = row['image_path']
    data = path.split('\\')
    slice_ = int(data[-1].split('_')[1])
    case = int(data[-3].split('_')[0].replace('case', ''))
    day = int(data[-3].split('_')[1].replace('day', ''))
    width = int(data[-1].split('_')[2])
    height = int(data[-1].split('_')[3])
    row['height'] = height
    row['width'] = width
    row['case'] = case
    row['day'] = day
    row['slice'] = slice_
    return row

def load_img(path, size=[320, 384]):
    img = cv2.imread(path, cv2.IMREAD_UNCHANGED)
    img = cv2.normalize(img, None, alpha=0, beta=255, norm_type=cv2.NORM_MINMAX)
    img = img.astype(np.uint8)
    shape0 = np.array(img.shape[:2])
    resize = np.array(size)
    if np.any(shape0!=resize):
        diff = resize - shape0
        pad0 = diff[0]
        pad1 = diff[1]
        pady = [pad0//2, pad0//2 + pad0%2]
        padx = [pad1//2, pad1//2 + pad1%2]
        img = np.pad(img, [pady, padx])
        img = img.reshape((resize))
    return img, shape0

def load_imgs(img_paths, size=[320, 384]):
    imgs = np.zeros((*size, len(img_paths)), dtype=np.uint8)
    for i, img_path in enumerate(img_paths):
        if i==0:
            img, shape0 = load_img(img_path, size=size)
        else:
            img, _ = load_img(img_path, size=size)
        # img = img.astype(np.uint8) # original is uint16
        imgs[..., i]+=img
    return imgs, shape0

## Data

In [3]:
DATA_PATH = 'D:\\UW_Madison_GI_Tract_Image_Segmentation\\train.csv'

In [4]:
sub_df = pd.read_csv(f'{DATA_PATH}')

In [5]:
sub_df

Unnamed: 0,id,class,segmentation
0,case123_day20_slice_0001,large_bowel,
1,case123_day20_slice_0001,small_bowel,
2,case123_day20_slice_0001,stomach,
3,case123_day20_slice_0002,large_bowel,
4,case123_day20_slice_0002,small_bowel,
...,...,...,...
115483,case30_day0_slice_0143,small_bowel,
115484,case30_day0_slice_0143,stomach,
115485,case30_day0_slice_0144,large_bowel,
115486,case30_day0_slice_0144,small_bowel,


In [6]:
sub_df = sub_df.drop(columns=['class','segmentation']).drop_duplicates()
sub_df = sub_df.apply(get_metadata,axis=1)

In [7]:
paths = glob.glob('D:\\UW_Madison_GI_Tract_Image_Segmentation\\train\\*\\*\\*\\*', recursive=True)
    
path_df = pd.DataFrame(paths, columns=['image_path'])
path_df = path_df.apply(path2info, axis=1)
path_df.head()

Unnamed: 0,image_path,height,width,case,day,slice
0,D:\UW_Madison_GI_Tract_Image_Segmentation\trai...,266,266,101,20,1
1,D:\UW_Madison_GI_Tract_Image_Segmentation\trai...,266,266,101,20,2
2,D:\UW_Madison_GI_Tract_Image_Segmentation\trai...,266,266,101,20,3
3,D:\UW_Madison_GI_Tract_Image_Segmentation\trai...,266,266,101,20,4
4,D:\UW_Madison_GI_Tract_Image_Segmentation\trai...,266,266,101,20,5


In [8]:
test_df = sub_df.merge(path_df, on=['case','day','slice'], how='left')
test_df.head()

Unnamed: 0,id,case,day,slice,image_path,height,width
0,case123_day20_slice_0001,123,20,1,D:\UW_Madison_GI_Tract_Image_Segmentation\trai...,266,266
1,case123_day20_slice_0002,123,20,2,D:\UW_Madison_GI_Tract_Image_Segmentation\trai...,266,266
2,case123_day20_slice_0003,123,20,3,D:\UW_Madison_GI_Tract_Image_Segmentation\trai...,266,266
3,case123_day20_slice_0004,123,20,4,D:\UW_Madison_GI_Tract_Image_Segmentation\trai...,266,266
4,case123_day20_slice_0005,123,20,5,D:\UW_Madison_GI_Tract_Image_Segmentation\trai...,266,266


### Create 2.5D

In [9]:
channels = 3
stride = 2
for i in range(channels):
    test_df[f'image_path_{i:02}'] = test_df.groupby(['case','day'])['image_path'].shift(-i*stride).fillna(method='ffill')
test_df['image_paths'] = test_df[[f'image_path_{i:02d}' for i in range(channels)]].values.tolist()

test_df.image_paths[0]

['D:\\UW_Madison_GI_Tract_Image_Segmentation\\train\\case123\\case123_day20\\scans\\slice_0001_266_266_1.50_1.50.png',
 'D:\\UW_Madison_GI_Tract_Image_Segmentation\\train\\case123\\case123_day20\\scans\\slice_0003_266_266_1.50_1.50.png',
 'D:\\UW_Madison_GI_Tract_Image_Segmentation\\train\\case123\\case123_day20\\scans\\slice_0005_266_266_1.50_1.50.png']

In [10]:
test_df.head()

Unnamed: 0,id,case,day,slice,image_path,height,width,image_path_00,image_path_01,image_path_02,image_paths
0,case123_day20_slice_0001,123,20,1,D:\UW_Madison_GI_Tract_Image_Segmentation\trai...,266,266,D:\UW_Madison_GI_Tract_Image_Segmentation\trai...,D:\UW_Madison_GI_Tract_Image_Segmentation\trai...,D:\UW_Madison_GI_Tract_Image_Segmentation\trai...,[D:\UW_Madison_GI_Tract_Image_Segmentation\tra...
1,case123_day20_slice_0002,123,20,2,D:\UW_Madison_GI_Tract_Image_Segmentation\trai...,266,266,D:\UW_Madison_GI_Tract_Image_Segmentation\trai...,D:\UW_Madison_GI_Tract_Image_Segmentation\trai...,D:\UW_Madison_GI_Tract_Image_Segmentation\trai...,[D:\UW_Madison_GI_Tract_Image_Segmentation\tra...
2,case123_day20_slice_0003,123,20,3,D:\UW_Madison_GI_Tract_Image_Segmentation\trai...,266,266,D:\UW_Madison_GI_Tract_Image_Segmentation\trai...,D:\UW_Madison_GI_Tract_Image_Segmentation\trai...,D:\UW_Madison_GI_Tract_Image_Segmentation\trai...,[D:\UW_Madison_GI_Tract_Image_Segmentation\tra...
3,case123_day20_slice_0004,123,20,4,D:\UW_Madison_GI_Tract_Image_Segmentation\trai...,266,266,D:\UW_Madison_GI_Tract_Image_Segmentation\trai...,D:\UW_Madison_GI_Tract_Image_Segmentation\trai...,D:\UW_Madison_GI_Tract_Image_Segmentation\trai...,[D:\UW_Madison_GI_Tract_Image_Segmentation\tra...
4,case123_day20_slice_0005,123,20,5,D:\UW_Madison_GI_Tract_Image_Segmentation\trai...,266,266,D:\UW_Madison_GI_Tract_Image_Segmentation\trai...,D:\UW_Madison_GI_Tract_Image_Segmentation\trai...,D:\UW_Madison_GI_Tract_Image_Segmentation\trai...,[D:\UW_Madison_GI_Tract_Image_Segmentation\tra...


### Masks

In [11]:
MASK_PATH = 'D:\\UW_Madison_GI_Tract_Image_Segmentation\\masks\\masks'
mask_list = glob.glob(f'{MASK_PATH}\\*')

In [12]:
mask_df = pd.DataFrame(mask_list, columns=['mask_path'])
mask_df['id'] = mask_df['mask_path'].apply(lambda x: x.split(sep='\\')[-1].split(sep='.')[0])

In [13]:
mask_df.shape

(38495, 2)

In [14]:
df = mask_df.merge(test_df, on='id', how='inner')

In [15]:
df.shape

(38495, 12)

In [16]:
df['id'].nunique()

38495

In [17]:
df.to_csv('D:\\UW_Madison_GI_Tract_Image_Segmentation\\df.csv', sep=' ')

In [19]:
pd.read_csv('D:\\UW_Madison_GI_Tract_Image_Segmentation\\df.csv', sep=' ', index_col=0)

Unnamed: 0,mask_path,id,case,day,slice,image_path,height,width,image_path_00,image_path_01,image_path_02,image_paths
0,D:\UW_Madison_GI_Tract_Image_Segmentation\mask...,case101_day20_slice_0002,101,20,2,D:\UW_Madison_GI_Tract_Image_Segmentation\trai...,266,266,D:\UW_Madison_GI_Tract_Image_Segmentation\trai...,D:\UW_Madison_GI_Tract_Image_Segmentation\trai...,D:\UW_Madison_GI_Tract_Image_Segmentation\trai...,['D:\\UW_Madison_GI_Tract_Image_Segmentation\\...
1,D:\UW_Madison_GI_Tract_Image_Segmentation\mask...,case101_day20_slice_0003,101,20,3,D:\UW_Madison_GI_Tract_Image_Segmentation\trai...,266,266,D:\UW_Madison_GI_Tract_Image_Segmentation\trai...,D:\UW_Madison_GI_Tract_Image_Segmentation\trai...,D:\UW_Madison_GI_Tract_Image_Segmentation\trai...,['D:\\UW_Madison_GI_Tract_Image_Segmentation\\...
2,D:\UW_Madison_GI_Tract_Image_Segmentation\mask...,case101_day20_slice_0004,101,20,4,D:\UW_Madison_GI_Tract_Image_Segmentation\trai...,266,266,D:\UW_Madison_GI_Tract_Image_Segmentation\trai...,D:\UW_Madison_GI_Tract_Image_Segmentation\trai...,D:\UW_Madison_GI_Tract_Image_Segmentation\trai...,['D:\\UW_Madison_GI_Tract_Image_Segmentation\\...
3,D:\UW_Madison_GI_Tract_Image_Segmentation\mask...,case101_day20_slice_0005,101,20,5,D:\UW_Madison_GI_Tract_Image_Segmentation\trai...,266,266,D:\UW_Madison_GI_Tract_Image_Segmentation\trai...,D:\UW_Madison_GI_Tract_Image_Segmentation\trai...,D:\UW_Madison_GI_Tract_Image_Segmentation\trai...,['D:\\UW_Madison_GI_Tract_Image_Segmentation\\...
4,D:\UW_Madison_GI_Tract_Image_Segmentation\mask...,case101_day20_slice_0006,101,20,6,D:\UW_Madison_GI_Tract_Image_Segmentation\trai...,266,266,D:\UW_Madison_GI_Tract_Image_Segmentation\trai...,D:\UW_Madison_GI_Tract_Image_Segmentation\trai...,D:\UW_Madison_GI_Tract_Image_Segmentation\trai...,['D:\\UW_Madison_GI_Tract_Image_Segmentation\\...
...,...,...,...,...,...,...,...,...,...,...,...,...
38490,D:\UW_Madison_GI_Tract_Image_Segmentation\mask...,case9_day22_slice_0140,9,22,140,D:\UW_Madison_GI_Tract_Image_Segmentation\trai...,310,360,D:\UW_Madison_GI_Tract_Image_Segmentation\trai...,D:\UW_Madison_GI_Tract_Image_Segmentation\trai...,D:\UW_Madison_GI_Tract_Image_Segmentation\trai...,['D:\\UW_Madison_GI_Tract_Image_Segmentation\\...
38491,D:\UW_Madison_GI_Tract_Image_Segmentation\mask...,case9_day22_slice_0141,9,22,141,D:\UW_Madison_GI_Tract_Image_Segmentation\trai...,310,360,D:\UW_Madison_GI_Tract_Image_Segmentation\trai...,D:\UW_Madison_GI_Tract_Image_Segmentation\trai...,D:\UW_Madison_GI_Tract_Image_Segmentation\trai...,['D:\\UW_Madison_GI_Tract_Image_Segmentation\\...
38492,D:\UW_Madison_GI_Tract_Image_Segmentation\mask...,case9_day22_slice_0142,9,22,142,D:\UW_Madison_GI_Tract_Image_Segmentation\trai...,310,360,D:\UW_Madison_GI_Tract_Image_Segmentation\trai...,D:\UW_Madison_GI_Tract_Image_Segmentation\trai...,D:\UW_Madison_GI_Tract_Image_Segmentation\trai...,['D:\\UW_Madison_GI_Tract_Image_Segmentation\\...
38493,D:\UW_Madison_GI_Tract_Image_Segmentation\mask...,case9_day22_slice_0143,9,22,143,D:\UW_Madison_GI_Tract_Image_Segmentation\trai...,310,360,D:\UW_Madison_GI_Tract_Image_Segmentation\trai...,D:\UW_Madison_GI_Tract_Image_Segmentation\trai...,D:\UW_Madison_GI_Tract_Image_Segmentation\trai...,['D:\\UW_Madison_GI_Tract_Image_Segmentation\\...
