In [21]:
import os
import cv2
import random
import numpy as np
from matplotlib import pyplot as plt

%matplotlib inline

In [17]:
np.random.seed(42)

In [10]:
source_path = 'C:/Users/hamma/Desktop/NCL/DS/11_FP_D/WSSS4LUAD-20220422T171928Z-001/WSSS4LUAD/1.training/1.training'
dest_path = 'C:/Users/hamma/Desktop/NCL/DS/11_FP_D/WSSS4LUAD-20220422T171928Z-001/WSSS4LUAD/cutmix_augmentation'

In [11]:
images = os.listdir(source_path)

In [6]:
tumor = [x for x in images if '[1, 0, 0]' in x]
stroma = [x for x in images if '[0, 1, 0]' in x]
normal = [x for x in images if '[0, 0, 1]' in x]

print(len(tumor))
print(len(stroma))
print(len(normal))

1181
1680
1832


In [7]:
np.random.shuffle(tumor)
np.random.shuffle(stroma)
np.random.shuffle(normal)

In [8]:
def rand_bbox(size, lamb):
    """ Generate random bounding box 
    Args:
        - size: [width, breadth] of the bounding box
        - lamb: (lambda) cut ratio parameter, sampled from Beta distribution
    Returns:
        - Bounding box
    """
    W = size[0]
    H = size[1]
    cut_rat = np.sqrt(1. - lamb)
    cut_w = np.int(W * cut_rat)
    cut_h = np.int(H * cut_rat)

    # uniform
    cx = np.random.randint(W)
    cy = np.random.randint(H)

    bbx1 = np.clip(cx - cut_w // 2, 0, W)
    bby1 = np.clip(cy - cut_h // 2, 0, H)
    bbx2 = np.clip(cx + cut_w // 2, 0, W)
    bby2 = np.clip(cy + cut_h // 2, 0, H)

    return bbx1, bby1, bbx2, bby2

In [18]:
def generate_cutmix_image(source, dest, image_list_1, image_list_2, num_images = 2000, beta = 1.0):
    """ Generate a CutMix augmented image from a batch 
    Args:
        - image_batch: a batch of input images
        - image_batch_labels: labels corresponding to the image batch
        - beta: a parameter of Beta distribution.
    Returns:
        - CutMix image batch, updated labels
    """
    # generate mixed sample
    for i in range(num_images):

        image_path_1 = source + '/' + image_list_1[np.random.randint(len(image_list_1))]
        image_path_2 = source + '/' + image_list_2[np.random.randint(len(image_list_2))]
        
        lam = np.random.beta(beta, beta)
        
        img_1 = cv2.resize(cv2.imread(image_path_1), (224,224))
        img_2 = cv2.resize(cv2.imread(image_path_2), (224,224))
        
        bbx1, bby1, bbx2, bby2 = rand_bbox(img_1.shape, lam)
        
        img = [img_1, img_2]
        np.random.shuffle(img)
        
        img[0][bbx1:bbx2, bby1:bby2] = img[1][bbx1:bbx2, bby1:bby2]
        
        
        label_1 = list(map(int, image_path_1[-13:-4].strip('][').split(', ')))
        label_2 = list(map(int, image_path_2[-13:-4].strip('][').split(', ')))

        label = list(map(sum, zip(label_1, label_2)))

        img_name = str(i).zfill(4) + '-' + str(label) + '.png'
        
        #print(image_path_1, '\n', image_path_2, '\n', label, '\n', img_name)

        cv2.imwrite(dest + '/' + img_name, img[0])

Need tumor, normal pairs and stroma and normal pairs

In [19]:
generate_cutmix_image(source_path, dest_path, tumor, normal, num_images = 2000, beta = 1.0)

In [20]:
generate_cutmix_image(source_path, dest_path, stroma, normal, num_images = 2000, beta = 1.0)

In [22]:
b1 = [0, 112, 0, 112]
b2 = [0, 112, 112, 224]
b3 = [112, 224, 0, 112]
b4 = [112, 224, 112, 224]
b5 = [56, 168, 56, 168]

boxes = [b1, b2, b3, b4, b5]

In [27]:
ba, bb = random.sample(boxes, 2)
print(ba, bb)

[0, 112, 0, 112] [0, 112, 112, 224]


In [42]:
random.sample(boxes, 1)[0]

[112, 224, 0, 112]

In [30]:
def generate_cutmix_image_three(source, dest, image_list_1, image_list_2, image_list_3, boxes, num_images = 2000, beta = 1.0):
    """ Generate a CutMix augmented image from a batch 
    Args:
        - image_batch: a batch of input images
        - image_batch_labels: labels corresponding to the image batch
        - beta: a parameter of Beta distribution.
    Returns:
        - CutMix image batch, updated labels
    """
    # generate mixed sample
    for i in range(num_images):

        image_path_1 = source + '/' + image_list_1[np.random.randint(len(image_list_1))]
        image_path_2 = source + '/' + image_list_2[np.random.randint(len(image_list_2))]
        image_path_3 = source + '/' + image_list_3[np.random.randint(len(image_list_3))]
        
        lam = np.random.beta(beta, beta)
        
        img_1 = cv2.resize(cv2.imread(image_path_1), (224,224))
        img_2 = cv2.resize(cv2.imread(image_path_2), (224,224))
        img_3 = cv2.resize(cv2.imread(image_path_3), (224,224))
        
        b1, b2 = random.sample(boxes, 2)
        
        img = [img_1, img_2, img_3]
        np.random.shuffle(img)
        
        img[0][b1[0]:b1[1], b1[2]:b1[3]] = img[1][b1[0]:b1[1], b1[2]:b1[3]]
        img[0][b2[0]:b2[1], b2[2]:b2[3]] = img[2][b2[0]:b2[1], b2[2]:b2[3]]
        
        
        label_1 = list(map(int, image_path_1[-13:-4].strip('][').split(', ')))
        label_2 = list(map(int, image_path_2[-13:-4].strip('][').split(', ')))
        label_3 = list(map(int, image_path_3[-13:-4].strip('][').split(', ')))

        label = list(map(sum, zip(label_1, label_2, label_3)))

        img_name = str(i).zfill(4) + '-' + str(label) + '.png'
        
        #print(image_path_1, '\n', image_path_2, '\n', image_path_3, '\n', label, '\n', img_name)

        cv2.imwrite(dest + '/' + img_name, img[0])

In [31]:
generate_cutmix_image_three(source_path, dest_path, tumor, stroma, normal, boxes, num_images = 2000, beta = 1.0)

In [45]:
def generate_cutmix_image_two(source, dest, image_list_1, image_list_2, boxes, num_images = 2000, beta = 1.0):
    """ Generate a CutMix augmented image from a batch 
    Args:
        - image_batch: a batch of input images
        - image_batch_labels: labels corresponding to the image batch
        - beta: a parameter of Beta distribution.
    Returns:
        - CutMix image batch, updated labels
    """
    # generate mixed sample
    for i in range(num_images):

        image_path_1 = source + '/' + image_list_1[np.random.randint(len(image_list_1))]
        image_path_2 = source + '/' + image_list_2[np.random.randint(len(image_list_2))]
        
        lam = np.random.beta(beta, beta)
        
        img_1 = cv2.resize(cv2.imread(image_path_1), (224,224))
        img_2 = cv2.resize(cv2.imread(image_path_2), (224,224))
        
        b1 = random.choice(boxes)
        
        img = [img_1, img_2]
        np.random.shuffle(img)
        
        img[0][b1[0]:b1[1], b1[2]:b1[3]] = img[1][b1[0]:b1[1], b1[2]:b1[3]]
        #img[0][b2[0]:b2[1], b2[2]:b2[3]] = img[2][b2[0]:b2[1], b2[2]:b2[3]]
        
        
        label_1 = list(map(int, image_path_1[-13:-4].strip('][').split(', ')))
        label_2 = list(map(int, image_path_2[-13:-4].strip('][').split(', ')))
        #label_3 = list(map(int, image_path_3[-13:-4].strip('][').split(', ')))

        label = list(map(sum, zip(label_1, label_2)))

        img_name = str(i).zfill(4) + '-' + str(label) + '.png'
        
        #print(image_path_1, '\n', image_path_2, '\n', image_path_3, '\n', label, '\n', img_name)

        cv2.imwrite(dest + '/' + img_name, img[0])

In [46]:
generate_cutmix_image_two(source_path, dest_path, tumor, normal, boxes, num_images = 2000, beta = 1.0)

In [47]:
generate_cutmix_image_two(source_path, dest_path, stroma, normal, boxes, num_images = 2000, beta = 1.0)