In [2]:
import os
import random
import string

import numpy as np
import pandas as pd
from matplotlib import pyplot as plt

from PIL import Image
import cv2 as cv
import shutil

This notebook will generate all the images following the Nucleus model.
It will crop in 600x600 images and create all the associated masks (1 image per mask)
This can work on the original wafer images, and the artificial images.

In [3]:
index = pd.MultiIndex.from_tuples([('point_1', 'x'), ('point_1', 'y'), ('point_2', 'x'), ('point_2', 'y'),
                                   ('point_3', 'x'), ('point_3', 'y'), ('point_4', 'x'), ('point_4', 'y')])

In [197]:
# Wafer file to crop
file = "../augmented_dataset/artificial_images/artificial_image_0.tiff"
wafer = Image.open(file)
seg_tissues = pd.read_csv("../augmented_dataset/artificial_images/seg_tissues_artif_0.txt", sep="\t|,", header=None, names=index, engine='python')
seg_mag = pd.read_csv("../augmented_dataset/artificial_images/seg_mag_artif_0.txt", sep="\t|,", header=None, names=index, engine='python')

# Height and width of the resulting section in pixels
section_size = 600

# Number of sections that will be generated
number_of_sections = 60

In [198]:
seg_tissues.head()

Unnamed: 0_level_0,point_1,point_1,point_2,point_2,point_3,point_3,point_4,point_4
Unnamed: 0_level_1,x,y,x,y,x,y,x,y
0,1840,1155,1885,1188,1799,1288,1769,1248
1,2281,750,2222,745,2241,613,2290,631
2,2246,2627,2194,2598,2263,2484,2303,2520
3,1548,2668,1550,2724,1417,2719,1428,2668
4,935,1054,992,1046,1003,1179,953,1172


In [190]:
def generate_mask(image, points):
    # size of the image
    height = image.size[0]
    width = image.size[1]
    
    # init mask
    img_mask = np.zeros([height,width],dtype=np.uint8)
    img_mask.fill(0)
    
    cv.fillConvexPoly(img_mask, points, 255)
    
    return img_mask

In [None]:
# Height and width of the resulting section in pixels
section_size = 600

# Number of sections that will be generated
number_of_sections = 60

In [217]:

for index_img_artif in range(20):
    
    index = pd.MultiIndex.from_tuples([('point_1', 'x'), ('point_1', 'y'), ('point_2', 'x'), ('point_2', 'y'),
                                   ('point_3', 'x'), ('point_3', 'y'), ('point_4', 'x'), ('point_4', 'y')])
    
    # Wafer file to crop
    file = f"../augmented_dataset/artificial_images/artificial_image_{index_img_artif}.tiff"
    wafer = Image.open(file)
    seg_tissues = pd.read_csv(f"../augmented_dataset/artificial_images/seg_tissues_artif_{index_img_artif}.txt", sep="\t|,", header=None, names=index, engine='python')
    seg_mag = pd.read_csv(f"../augmented_dataset/artificial_images/seg_mag_artif_{index_img_artif}.txt", sep="\t|,", header=None, names=index, engine='python')

    
    
    
    for i in range(1,number_of_sections+1):

        stop = 0
        
        # random crop coordinates (top-left point of the cropped area)
        start_x = random.randint(0, wafer.size[0] - section_size)
        start_y = random.randint(0, wafer.size[1] - section_size)


        # cropping the wafer image
        cropped_image = wafer.crop((start_x,start_y,start_x+section_size,start_y+section_size))
        #plt.imshow(cropped_image)
        #plt.title("Cropped section"+str(i)+" :")
        #plt.show()  


        # index of all tissue part within the cropped area
        tissue_indicies = list()
        for index, row in seg_tissues.iterrows(): # iterating over sections
            points_within = 0
            for j in range(0,8,2): # iterating over the 4 points for each section
                if (start_x <= row[j] <= start_x+section_size) & (start_y <= row[j+1] <= start_y+section_size):
                    points_within += 1
            if(points_within >= 2):
                tissue_indicies.append(index)

        # index of all magnetic part within the cropped area
        mag_indicies = list()
        for index, row in seg_mag.iterrows(): # iterating over sections
            points_within = 0
            for j in range(0,8,2): # iterating over the 4 points for each section
                if (start_x <= row[j] <= start_x+section_size) & (start_y <= row[j+1] <= start_y+section_size):
                    points_within += 1
            if(points_within >= 2):
                mag_indicies.append(index)

        # keeping only sections indices if both part are within the cropped area
        section_indices = list( set(tissue_indicies) or set(mag_indicies) )
        #print (section_indices)

        if(len(section_indices) >=4):
            
            # creating directories to store results
            image_folder = f"../augmented_dataset/stage1/artif_{index_img_artif}_crop"+str(i)+"/image/" 
            os.makedirs(os.path.dirname(image_folder), exist_ok=True)
            tissue_masks_folder = f"../augmented_dataset/stage1/artif_{index_img_artif}_crop"+str(i)+"/tissue_masks/" 
            os.makedirs(os.path.dirname(tissue_masks_folder), exist_ok=True)
            magnetic_masks_folder = f"../augmented_dataset/stage1/artif_{index_img_artif}_crop"+str(i)+"/magnetic_masks/" 
            os.makedirs(os.path.dirname(magnetic_masks_folder), exist_ok=True)

            # creating tissue part mask
            ind_img = 0
            for section_index in section_indices:
                vertices = np.array(seg_tissues.loc[[section_index]]).reshape((4, 2))
                #print(vertices)

                mask = generate_mask(wafer,vertices)

                _mask = Image.fromarray(np.uint8(mask))
                _mask = _mask.crop((start_x,start_y,start_x+section_size,start_y+section_size))
                #plt.imshow(_mask)
                #plt.title("Tissue mask "+str(section_index)+" :")
                #plt.show()  
                
                # Time wait to avoid GPU masks overlapping (strange)
                cv.waitKey(1)
                if(np.mean(mask) == 0):
                    #If full black image remove 
                    shutil.rmtree(f"../augmented_dataset/stage1/artif_{index_img_artif}_crop"+str(i))
                    stop=1
                    break

                # saving the cropped mask
                _mask.save(tissue_masks_folder+str(section_index)+".tif")
                ind_img = ind_img +1
                

            # creating magnetic part mask
            ind_img = 0
            if(stop == 0):
                for section_index in section_indices:
                    vertices = np.array(seg_mag.loc[[section_index]], 'int32').reshape((4, 2))
                    #print(vertices)

                    mask = generate_mask(wafer,vertices)
                    _mask = Image.fromarray(np.uint8(mask))
                    _mask = _mask.crop((start_x,start_y,start_x+section_size,start_y+section_size))

                    #plt.imshow(_mask)
                    #plt.title("Magnetic mask "+str(section_index)+" :")
                    #plt.show()
                    
                    # Time wait to avoid GPU masks overlapping (strange)
                    cv.waitKey(1)
                    if(np.mean(mask) == 0):
                        #If full black image  remove 
                        shutil.rmtree(f"../augmented_dataset/stage1/artif_{index_img_artif}_crop"+str(i))
                        stop = 1
                        break

                    # saving the cropped mask
                    _mask.save(magnetic_masks_folder+str(section_index)+".tif")
                    ind_img = ind_img +1

                # saving the cropped image
                if(stop==0):
                    cropped_image.save(image_folder+"artif_{index_img_artif}_crop"+str(i)+".tif")
    