In [1]:
## 1. Loading Image Data
## 2. Crop the non-defect image data
## 3. Pair the defect + mask image data

In [2]:
import cv2
import os
import glob as gb
import shutil
import random
import string
import numpy as np

In [3]:
## 1-1. Loading Image Data and add them into each list
random.seed(42)

nodefect_img_list = []
for filepath in gb.iglob(r'./NODefect_images/**/*.png'):
    nodefect_img_list.append(filepath)

defect_img_list = []
for filepath in gb.iglob(r'./Defect_images/*.png'):
    defect_img_list.append(filepath)
    
mask_img_list = []
for filepath in gb.iglob(r'./Mask_images/*.png'):
    mask_img_list.append(filepath)

In [4]:
## 1-2. Sorting files
nodefect_img_list = sorted(nodefect_img_list)
defect_img_list = sorted(defect_img_list)
mask_img_list = sorted(mask_img_list)
print(nodefect_img_list[0])
print(defect_img_list[0])
print(mask_img_list[0])

./NODefect_images/2306881-210020u/0001_000_05.png
./Defect_images/0001_002_00.png
./Mask_images/0001_002_00_mask.png


In [5]:
## 1-2. Image shape check
nodefect_image_sample = cv2.imread(nodefect_img_list[0])
print('NO Defect Image shape: ', nodefect_image_sample.shape[:-1])

defect_image_sample = cv2.imread(defect_img_list[0])
print('Defect Image shape: ', defect_image_sample.shape[:-1])

mask_image_sample = cv2.imread(mask_img_list[0])
print('Mask Image shape: ', mask_image_sample.shape[:-1])

NO Defect Image shape:  (256, 4096)
Defect Image shape:  (256, 4096)
Mask Image shape:  (256, 4096)


In [14]:
## 2.1. Cropping non defect images by margin to cover overlapped area, 
## start point incremeting by margin value (128)

index = 0

for item in nodefect_img_list:
    image = cv2.imread(item)
    h, w = image.shape[:-1]
    m = h // 2

    for i in range(w // m): 
        st_point = i * margin
        
        ## condition: until the start point is not greater than image width 
        ## and take random samples when random number is 0 or 1, 2. 
        if st_point < w - h and random.randint(0, 9) < 3: 
            #print(start_point, width-height)
            crop = image[:, st_point:st_point+h, :]
            cv2.imwrite('dataset/3/0/%04d.png' % index, crop)
            index += 1 

In [21]:
mask_img_list

['./Mask_images/0001_002_00_mask.png',
 './Mask_images/0002_002_00_mask.png',
 './Mask_images/0003_002_00_mask.png',
 './Mask_images/0004_002_01_mask.png',
 './Mask_images/0005_002_01_mask.png',
 './Mask_images/0006_002_01_mask.png',
 './Mask_images/0010_006_02_mask.png',
 './Mask_images/0011_006_02_mask.png',
 './Mask_images/0012_006_02_mask.png',
 './Mask_images/0013_006_02_mask.png',
 './Mask_images/0014_006_02_mask.png',
 './Mask_images/0015_006_02_mask.png',
 './Mask_images/0016_006_02_mask.png',
 './Mask_images/0017_002_02_mask.png',
 './Mask_images/0018_010_03_mask.png',
 './Mask_images/0019_016_03_mask.png',
 './Mask_images/0020_016_03_mask.png',
 './Mask_images/0021_016_03_mask.png',
 './Mask_images/0022_019_02_mask.png',
 './Mask_images/0023_019_02_mask.png',
 './Mask_images/0024_019_02_mask.png',
 './Mask_images/0025_019_02_mask.png',
 './Mask_images/0026_019_02_mask.png',
 './Mask_images/0027_019_02_mask.png',
 './Mask_images/0028_019_02_mask.png',
 './Mask_images/0029_019_

In [22]:
mask_img_list[0].split('/')[-1].split('_mask')

['0001_002_00', '.png']

In [23]:
mask_img_list[0].split('/')[-1].split('_')[0]

'0001'

In [20]:
defect_img_list

['./Defect_images/0001_002_00.png',
 './Defect_images/0002_002_00.png',
 './Defect_images/0003_002_00.png',
 './Defect_images/0004_002_01.png',
 './Defect_images/0005_002_01.png',
 './Defect_images/0006_002_01.png',
 './Defect_images/0010_006_02.png',
 './Defect_images/0011_006_02.png',
 './Defect_images/0012_006_02.png',
 './Defect_images/0013_006_02.png',
 './Defect_images/0014_006_02.png',
 './Defect_images/0015_006_02.png',
 './Defect_images/0016_006_02.png',
 './Defect_images/0017_002_02.png',
 './Defect_images/0018_010_03.png',
 './Defect_images/0019_016_03.png',
 './Defect_images/0020_016_03.png',
 './Defect_images/0021_016_03.png',
 './Defect_images/0022_019_02.png',
 './Defect_images/0023_019_02.png',
 './Defect_images/0024_019_02.png',
 './Defect_images/0025_019_02.png',
 './Defect_images/0026_019_02.png',
 './Defect_images/0027_019_02.png',
 './Defect_images/0028_019_02.png',
 './Defect_images/0029_019_02.png',
 './Defect_images/0030_019_02.png',
 './Defect_images/0031_019_0

In [24]:
defect_img_list[0].split('/')[-1]

'0001_002_00.png'

In [26]:
## matching the defect number = mask number and append them to new lists

defect_img_list_new = []
mask_img_list_new = []

for defect_img in defect_img_list:
    ''' Splitting and extract the defect number 
    '''
    defect_number = defect_img.split('/')[-1].split('.')[:-1]
    #print('defect number: ', defect_number)
    for mask_img in mask_img_list:
        
        ''' Splitting and extract the mask number'''
        
        mask_number = mask_img.split('/')[-1].split('_mask')[:-1]
        
        #print('mask number: ', mask_number)
        
        ''' matching the defect number == mask number, append them to each list'''
        if defect_number == mask_number:
            defect_img_list_new.append(defect_img)
            mask_img_list_new.append(mask_img)
            
defect_img_list = defect_img_list_new
mask_img_list = mask_img_list_new

In [32]:
random.seed(42)
pair_img_list = []

for item in zip(defect_img_list, mask_img_list):
    defect, mask = item
#     print(defect)
#     print(mask)

    defect_img = cv2.imread(defect)
    mask_img = cv2.imread(mask)

    h, w = defect_img.shape[:-1]
    m = h // 2
    
    for i in range(w // m):
        st_point = i * m
        if st_point < w - h:
            crop_defect = defect_img[:, st_point:st_point+h, :]
            crop_mask = mask_img[:, st_point:st_point+h, :]

            if crop_mask.sum() > 0: ## if not black coloured, something found
                pair_img_list.append((crop_defect, crop_mask))

In [33]:
for i, pair in enumerate(pair_img_list):
    crop_defect, crop_mask = pair
    cv2.imwrite('dataset/3/1/%04d.png' % i, crop_defect)