In [64]:
import re
import numpy as np
import pandas as pd
import os
import PIL
import random
import shutil
import matplotlib.pyplot as plt
import PIL
import sys
import cv2
from tqdm import tqdm
from PIL import Image, ImageMath

from skimage.transform import rescale, resize, downscale_local_mean
#from img_processing_256 import rename, random_flip_img_train, crop_img
%matplotlib inline

In [82]:
def remove_margins(image_arr, margin=20):
	"""
	function to trim plxels off all sides of an image
	"""
	h, w = image_arr.shape
	new_image = image_arr[margin:h-margin,margin:w-margin]
	return new_image

In [75]:
def mask_img2(mask_path, full_image_arr, slice_size=512, return_size=False, half=True, output=True):

	mask = PIL.Image.open(mask_path)    
	if half:
		h, w = mask.size
		new_size = ( h // 2, w // 2)
		mask.thumbnail(new_size, PIL.Image.ANTIALIAS)

	mask_arr = np.array(mask)
	mask_arr = mask_arr[:,:]
		
	if np.sum(np.sum(full_image_arr >= 225)) > 20000:
		full_image_arr = remove_margins(full_image_arr)
		mask_arr = remove_margins(mask_arr)
		if output:
			print("Trimming borders", mask_path)
			
	# The maks size must be same as the full image size
	if mask_arr.shape != full_image_arr.shape:
		# see if the ratios are the same
		mask_ratio = mask_arr.shape[0] / mask_arr.shape[1]
		image_ratio = full_image_arr.shape[0] / full_image_arr.shape[1]
		
		if abs(mask_ratio - image_ratio) <=  1e-03:
			if output:
				print("Mishaped mask, resizing mask", mask_path)
			
			# reshape the mask to match the image
			#mask_arr = imresize(mask_arr, full_image_arr.shape)
			mask_arr = np.array(Image.fromarray(mask_arr).resize(full_image_arr.shape))
		else:
			if output:
				print("Mask shape:", mask_arr.shape)
				print("Image shape:", full_image_arr.shape)
			print("Mask shape doesn't match image!", mask_path)
			return 0, 0, False, full_image_arr, 0
	
	# find the borders
	mask_mask = mask_arr == 255

	# check whether each row or column have a white pixel
	cols = np.sum(mask_mask, axis=0)
	rows = np.sum(mask_mask, axis=1)

	# check corners
	first_col = np.argmax(cols > 0)
	last_col = mask_arr.shape[1] - np.argmax(np.flip(cols, axis=0) > 0)
	center_col = int((first_col + last_col) / 2)

	first_row = np.argmax(rows > 0)
	last_row = mask_arr.shape[0] - np.argmax(np.flip(rows, axis=0) > 0)
	center_row = int((first_row + last_row) / 2)
	
	col_size = last_col - first_col
	row_size = last_row - first_row
	
	mask_size = [row_size, col_size]
	
	# When a mask size is bigger than a slice
	too_big = False
	
	if (last_col - first_col > slice_size + 30) or (last_row - first_row > slice_size + 30):
		too_big = True
	  
	return center_row, center_col, too_big, full_image_arr, mask_size

In [124]:
def create_patches(mask_dir, img_dir, Lbls, size=256, debug=True):
    patch_list = []
    Lbl_list = []
    FN_list = []
    global roi_sizes
    full_size = 512
    masks = os.listdir(mask_dir)
    counter = 0
    if debug is None:
        progress(counter, len(masks), 'WORKING')
    for mask in tqdm(masks):
        counter += 1
        if debug is None:
            progress(counter, len(masks), mask)    
        base_img_file = mask[:-6] + ".png"
        full_img = PIL.Image.open(img_dir+"/"+base_img_file)
#        try:
#            full_img = PIL.Image.open(img_dir + "/" + base_img_file + '.png')
#        except:
#            try:
#                full_img = PIL.Image.open(img_dir + "/" + base_img_file + '000000.png')
#            except:
#                try:
#                    full_img = PIL.Image.open(img_dir + "/" + base_img_file + '000001.png')
#                except:
#                    print("Error FileNotFound:", base_img_file)
#                    continue
        compare = base_img_file[10:]
        try:
            Lbl = Lbls.loc[compare]['Class']
        except:
            print("Error LabelNotFound", base_img_file)
            continue
            
        full_img_arr = np.array(full_img)[:,:]
        ctr_row, ctr_col, too_big, full_img_arr, mask_size = mask_img2(mask_dir + "/" + mask,full_img_arr, half=False,
                                                                         output=debug)
        img_h, img_w = full_img_arr.shape
        try:
            mask_H = mask_size[0]
            mask_W = mask_size[1]
            roi_size = np.max([mask_H, mask_W])
            if debug:
                print("Mask", mask, " Height:", mask_H, "Width:", mask_W)
        except:
            print("Mask Size Error:", mask_size, "for", mask)
        # Record roi size for DDSM image crop
        roi_sizes = []
        roi_sizes.append(roi_size)
        if (ctr_row == 0) and (ctr_col == 0):
            print("Error, skipping", mask)
            continue
        """
        Extract the ROI depending on it's size
        If the ROI is smaller than a slice extract it with some padding
        """
        if roi_size < full_size:
            if debug:
                print("ROI small", mask)
            ## Make sure the size of the ROI is at least as big as a tile will be
            adj_mask_H = int(np.max([full_size * 1.4, mask_H]))
            adj_mask_W = int(np.max([full_size * 1.4, mask_W]))
            ## Extract the full ROI with 20% padding on either side
            start_row = int(np.max([ctr_row - (adj_mask_H // 2), 0]))
            end_row = start_row + adj_mask_H
            if end_row > img_h:
                end_row = img_h
                start_row = img_h - adj_mask_H
            start_col = int(np.max([ctr_col - (adj_mask_W // 2), 0]))
            end_col = start_col + adj_mask_W
            if end_col > img_w:
                end_col = img_w
                start_col = img_w - adj_mask_W

            # extract the ROI and randomly flip it
            roi_img = random_flip_img_train(full_img_arr[start_row:end_row, start_col:end_col])
        # else extract the ROI with less padding
        else:
            if debug:
                print("ROI Big", mask)
            # padding for the random cropping
            adj_mask_H = int(np.max([full_size * 1.15, mask_H]))
            adj_mask_W = int(np.max([full_size * 1.15, mask_W]))
            start_row = np.max([ctr_row - (adj_mask_H // 2), 0])
            end_row = start_row + adj_mask_H
            if end_row > img_h:
                end_row = img_h
                start_row = img_h - adj_mask_H
            start_col = np.max([ctr_col - (adj_mask_W // 2), 0])
            end_col = start_col + adj_mask_W
            if end_col > img_w:
                end_col = img_w
                start_col = img_w - adj_mask_W
            # extract the ROI and randomly flip it
            roi_img = random_flip_img_train(full_img_arr[start_row:end_row, start_col:end_col])
              
        patch_1 = crop_img(roi_img)
        patch_2 = crop_img(roi_img)
        patch_3 = crop_img(roi_img)
         
        if (patch_1.shape[0] == size) and (patch_1.shape[1] == size):
            patch_list.append(patch_1)
            Lbl_list.append(Lbl)
            FN_list.append(base_img_file + ".png")
                
        if (patch_2.shape[0] == size) and (patch_2.shape[1] == size):
            patch_list.append(patch_2)
            Lbl_list.append(Lbl)
            FN_list.append(base_img_file + ".png")
        
        if (patch_3.shape[0] == size) and (patch_2.shape[1] == size):
            patch_list.append(patch_3)
            Lbl_list.append(Lbl)
            FN_list.append(base_img_file + ".png")
                
    return np.array(patch_list), np.array(Lbl_list), np.array(FN_list), roi_sizes

In [105]:
train_labels = pd.read_pickle("train_label.pkl")
train_labels['IMAGE_NAME2'] = train_labels.index
#train_labels = train_labels.drop_duplicates(['IMAGE_NAME2'])#중복제거

In [108]:
img_dir = "D:/mammography/cropped_images/train"

train_mass_patch, train_mass_Lbl, train_mass_FN, train_mass_roi_size = \
        create_patches(mask_dir, img_dir, Lbls=train_labels, debug=True)

  0%|                                                                                          | 0/326 [00:00<?, ?it/s]


FileNotFoundError: [Errno 2] No such file or directory: 'D:/mammography/cropped_images/train/Calc-Test_P_00038_LEFT_CC.png'

In [125]:
#Calc_test
test_labels = pd.read_pickle("label old ver/test_label.pkl")
test_labels['IMAGE_NAME2'] = test_labels.index
test_labels = test_labels.drop_duplicates(['IMAGE_NAME2'])

## use a copy on the local drive to make testing faster
mask_dir = "D:/mammography/cropped_images/test_png/Calc"
img_dir = "D:/mammography/full/test_png/Calc"

roi_sizes = []

test_mass_patch, test_mass_Lbl, test_mass_FN, test_mass_roi_size = \
    create_patches(mask_dir, img_dir, Lbls=test_labels, debug=True)


  0%|                                                                                          | 0/326 [00:00<?, ?it/s]

(249, 265)
Trimming borders D:/mammography/cropped_images/test_png/Calc/Calc-Test_P_00038_LEFT_CC_1.png
Mask shape: (209, 225)
Image shape: (4576, 2976)
Mask shape doesn't match image! D:/mammography/cropped_images/test_png/Calc/Calc-Test_P_00038_LEFT_CC_1.png
Mask Size Error: 0 for Calc-Test_P_00038_LEFT_CC_1.png





UnboundLocalError: local variable 'roi_size' referenced before assignment

In [42]:
print(test_labels.loc['P_00038_LEFT_CC.png'])

Patient_ID                       P_00038
Breast_Density                         2
Side_L_R                            LEFT
Image View                            CC
Abnormality_ID                         1
Abnormality_Type           calcification
Mass_Shape          PUNCTATE-PLEOMORPHIC
Mass_Margins                   CLUSTERED
Assessment                             4
Pathology                         BENIGN
Subtlety                               2
Class               BENIGN_calcification
IMAGE_NAME2          P_00038_LEFT_CC.png
Name: P_00038_LEFT_CC.png, dtype: object
