In [1]:
import re
import numpy as np
import pandas as pd
import os
import PIL
import random
import shutil
import matplotlib.pyplot as plt
import PIL
import sys
import cv2
from tqdm import tqdm
from PIL import Image, ImageMath

from skimage.transform import rescale, resize, downscale_local_mean
from img_processing_256 import mask_img, rename
%matplotlib inline

In [2]:
def random_rotate_image_train(img):
    rotations = np.random.randint(low=-3, high=3)
    return np.rot90(img, rotations)

In [3]:
def random_flip_img_train(img):
    fliplr = np.random.binomial(1,0.5)
    flipud = np.random.binomial(1,0.5)
    
    if fliplr:
        img = np.flip(img, 1)
    if flipud:
        img = np.flip(img, 0)
        
    return random_rotate_image_train(img)

In [4]:
def crop_img(img):
    slice_size=512
    tile_size=256
    img_h = img.shape[0]
    img_w = img.shape[1]
    
    # make sure the image is big enough to use
    if (img_h < slice_size) or (img_w < slice_size):
        print("Error - image is wrong size!", img.shape)
        return np.array([0])
    
    # pick a random place to start the crop so that the crop will be the right size
    start_row = np.random.randint(low=0, high=(img_h - slice_size))
    start_col = np.random.randint(low=0, high=(img_w - slice_size))
    
    end_row = start_row + slice_size
    end_col = start_col + slice_size
    
    # crop the image and randomly rotate it
    cropped_img = random_flip_img_train(img[start_row:end_row, start_col:end_col])
    
    # make sure the image is the right size
    if cropped_img.shape[0] == cropped_img.shape[1]:
        # resize it and return it
        cropped_img = cropped_img.astype('float32')
        cropped_img = cv2.resize(cropped_img, dsize=(tile_size, tile_size), interpolation=cv2.INTER_CUBIC) 
        return cropped_img.reshape((tile_size, tile_size, 1))
    
    # else repeat until the image is the right size
    else:
        return crop_img(img)

In [5]:
def create_patches(mask_dir, img_dir, Lbls, size=256, debug=True):
    patch_list = []
    Lbl_list = []
    FN_list = []
    roi_sizes = []
    full_size = 512
    masks = os.listdir(mask_dir)
    counter = 0
    if debug is None:
        progress(counter, len(masks), 'WORKING')
    for mask in tqdm(masks):
        counter += 1
        if debug is None:
            progress(counter, len(masks), mask)    
            
        base_img_file = mask[:-6] + ".png"
        full_img = PIL.Image.open(img_dir+"/"+base_img_file)
        compare = base_img_file[14:]
        
        try:
            Lbl = Lbls.loc[compare]['Class']
        except:
            print("Error LabelNotFound", base_img_file)
            continue
        
        full_img_arr = np.array(full_img)[:,:]
        ctr_row, ctr_col, too_big, full_img_arr, mask_size = mask_img(mask_dir + "/" + mask,full_img_arr, half=False,
                                                                         output=debug)
        img_h, img_w = full_img_arr.shape
        try:
            mask_H = mask_size[0]
            mask_W = mask_size[1]
            roi_size = np.max([mask_H, mask_W])
            if debug:
                print("Mask", mask, " Height:", mask_H, "Width:", mask_W)
        except:
            print("Mask Size Error:", mask_size, "for", mask)
        # Record roi size for DDSM image crop
        roi_sizes.append(roi_size)
        if (ctr_row == 0) and (ctr_col == 0):
            print("Error, skipping", mask)
            continue
        """
        Extract the ROI depending on it's size
        If the ROI is smaller than a slice extract it with some padding
        """
        if roi_size < full_size:
            if debug:
                print("ROI small", mask)
            ## Make sure the size of the ROI is at least as big as a tile will be
            adj_mask_H = int(np.max([full_size * 1.4, mask_H]))
            adj_mask_W = int(np.max([full_size * 1.4, mask_W]))
            ## Extract the full ROI with 20% padding on either side
            start_row = int(np.max([ctr_row - (adj_mask_H // 2), 0]))
            end_row = start_row + adj_mask_H
            if end_row > img_h:
                end_row = img_h
                start_row = img_h - adj_mask_H
            start_col = int(np.max([ctr_col - (adj_mask_W // 2), 0]))
            end_col = start_col + adj_mask_W
            if end_col > img_w:
                end_col = img_w
                start_col = img_w - adj_mask_W

            # extract the ROI and randomly flip it
            roi_img = random_flip_img_train(full_img_arr[start_row:end_row, start_col:end_col])
        # else extract the ROI with less padding
        else:
            if debug:
                print("ROI Big", mask)
            # padding for the random cropping
            adj_mask_H = int(np.max([full_size * 1.15, mask_H]))
            adj_mask_W = int(np.max([full_size * 1.15, mask_W]))
            start_row = np.max([ctr_row - (adj_mask_H // 2), 0])
            end_row = start_row + adj_mask_H
            if end_row > img_h:
                end_row = img_h
                start_row = img_h - adj_mask_H
            start_col = np.max([ctr_col - (adj_mask_W // 2), 0])
            end_col = start_col + adj_mask_W
            if end_col > img_w:
                end_col = img_w
                start_col = img_w - adj_mask_W
            # extract the ROI and randomly flip it
            roi_img = random_flip_img_train(full_img_arr[start_row:end_row, start_col:end_col])
              
        patch_1 = crop_img(roi_img)
        patch_2 = crop_img(roi_img)
        patch_3 = crop_img(roi_img)
         
        if (patch_1.shape[0] == size) and (patch_1.shape[1] == size):
            patch_list.append(patch_1)
            Lbl_list.append(Lbl)
            FN_list.append(base_img_file)
                
        if (patch_2.shape[0] == size) and (patch_2.shape[1] == size):
            patch_list.append(patch_2)
            Lbl_list.append(Lbl)
            FN_list.append(base_img_file)
        
        if (patch_3.shape[0] == size) and (patch_2.shape[1] == size):
            patch_list.append(patch_3)
            Lbl_list.append(Lbl)
            FN_list.append(base_img_file)
                
    return np.array(patch_list), np.array(Lbl_list), np.array(FN_list), roi_sizes

In [6]:
train_labels = pd.read_pickle("label old ver/train_label.pkl")
train_labels['IMAGE_NAME2'] = train_labels.index
train_labels = train_labels.drop_duplicates(['IMAGE_NAME2'])
## use a copy on the local drive to make testing faster
mask_dir = "D:/mammography/mask/train_png/Calc"
img_dir = "D:/mammography/full/train_png/Calc"

train_calc_patch, train_calc_Lbl, train_calc_FN, train_calc_roi_size = \
        create_patches(mask_dir, img_dir, Lbls=train_labels, debug=True)

  0%|                                                                                         | 0/1546 [00:00<?, ?it/s]

Trimming borders D:/mammography/mask/train_png/Calc/Calc-Training_P_00005_RIGHT_CC_1.png
Mask Calc-Training_P_00005_RIGHT_CC_1.png  Height: 549 Width: 637
ROI Big Calc-Training_P_00005_RIGHT_CC_1.png





error: OpenCV(4.2.0) C:\projects\opencv-python\opencv\modules\imgproc\src\resize.cpp:3923: error: (-215:Assertion failed) func != 0 in function 'cv::hal::resize'


In [None]:
#Calc_training
train_labels = pd.read_pickle("label old ver/train_label.pkl")
train_labels['IMAGE_NAME2'] = train_labels.index
train_labels = train_labels.drop_duplicates(['IMAGE_NAME2'])

## use a copy on the local drive to make testing faster
mask_dir = "D:/mammography/mask/train_png/Calc"
img_dir = "D:/mammography/full/train_png/Calc"

roi_sizes = []

train_calc_patch, train_calc_Lbl, train_calc_FN, train_calc_roi_size = \
    create_patches(roi_sizes, mask_dir, img_dir, Lbls=train_labels, debug=True)