In [None]:
import json
import pandas as pd 
import numpy as np 
from glob import glob
import os
import matplotlib.pyplot as plt
import shutil
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle
from PIL import Image
from tqdm.auto import tqdm
import roifile
import cv2

import imgaug.augmenters as iaa

In [None]:
folder_path=glob("../../data/ddh_standard_frame_data/*")

def createDirectory(directory):
    """_summary_
        create Directory
    Args:
        directory (string): file_path
    """    
    try:
        if not os.path.exists(directory):
            os.makedirs(directory)
    except OSError:
        print("Error: Failed to create the directory.")
        
def expand2square(pil_img, background_color):
    """_summary_
        pillow image make Image to be padded
    Args:
        pil_img (Pillow image): Image to be padded
        background_color (_type_): 

    Returns:
        _type_: _description_
    """    
    width, height = pil_img.size
    if width == height:
        return pil_img
    elif width > height:
        result = Image.new(pil_img.mode, (width, width), background_color)
        result.paste(pil_img, (0, (width - height) // 2))
        return result
    else:
        result = Image.new(pil_img.mode, (height, height), background_color)
        result.paste(pil_img, ((height - width) // 2, 0))
        return result



In [None]:
# .zip zipper and .Roi file open & save
for i in range(1,21):
    file_list=glob('../../data/pre_DDH/'+str(i)+'/seg_label/*.zip')
    output_dir = "../../data/pre_DDH/"+str(i)+"/segLabel/"
    format = "zip"
    for j in range(len(file_list)):
        createDirectory(output_dir+os.path.splitext(os.path.basename(file_list[j]))[0][:os.path.splitext(os.path.basename(file_list[j]))[0].find('_')])
        shutil.unpack_archive(file_list[j], output_dir+os.path.splitext(os.path.basename(file_list[j]))[0][:os.path.splitext(os.path.basename(file_list[j]))[0].find('_')], format)
        

In [None]:
# polygon to mask
roi_list=glob('../../data/pre_DDH/**/segLabel/**')
image_list=[f.replace('/segLabel', '/Standard')+'.bmp' for f in roi_list]
mask_list=[f.replace('/segLabel', '/raw_seg_mask') for f in roi_list]
for i in tqdm(range(len(image_list))):
    createDirectory(mask_list[i])
    for j in range(1,4):
        try:
            roi = roifile.ImagejRoi.fromfile(roi_list[i]+'/'+str(j)+'.roi')
        except:
            try:
                roi = roifile.ImagejRoi.fromfile(roi_list[i]+'/'+str(j)+'-1.roi')
            except:
                break;
        
        image=Image.open(image_list[i])
        polygon=roi.integer_coordinates
        polygon[:,0]+=roi.left
        polygon[:,1]+=roi.top
        mask= np.zeros((image.height,image.width))
        mask=cv2.fillPoly(mask,[polygon],255)
        cv2.imwrite(mask_list[i]+'/'+str(j)+'.png',mask)


In [172]:
#classification dataset create
standard_image_list=glob('../../data/pre_DDH/**/Standard/*.bmp')
not_standard_image_list=glob('../../data/pre_DDH/**/Not_Standard/*.bmp')
padding_standard_image_list=[f.replace('/pre_DDH', '/classificationDDH/dataset') for f in standard_image_list]
padding_standard_image_list=[f.replace('.bmp', '.png') for f in padding_standard_image_list]
padding_standard_image_list=[f.replace('/Standard', '') for f in padding_standard_image_list]
padding_not_standard_image_list=[f.replace('/pre_DDH', '/classificationDDH/dataset') for f in not_standard_image_list]
padding_not_standard_image_list=[f.replace('.bmp', '.png') for f in padding_not_standard_image_list]
padding_not_standard_image_list=[f.replace('/Not_Standard', '') for f in padding_not_standard_image_list]
image_size=512
case_list=[]
file_list=[]
class_list=[]
for i in tqdm(range(len(standard_image_list))):
    case=os.path.basename(os.path.dirname(os.path.dirname(standard_image_list[i])))
    file_name=os.path.basename(padding_standard_image_list[i])
    createDirectory(os.path.dirname(padding_standard_image_list[i]))
    image=expand2square(Image.open(standard_image_list[i]),(0,0,0))
    image=image.resize((image_size,image_size))
    image.save(padding_standard_image_list[i])
    case_list.append(case)
    file_list.append(file_name)
    class_list.append(1)
    
for i in tqdm(range(len(not_standard_image_list))):
    case=os.path.basename(os.path.dirname(os.path.dirname(not_standard_image_list[i])))
    file_name=os.path.basename(padding_not_standard_image_list[i])
    createDirectory(os.path.dirname(padding_not_standard_image_list[i]))
    image=expand2square(Image.open(not_standard_image_list[i]),(0,0,0))
    image=image.resize((image_size,image_size))
    image.save(padding_not_standard_image_list[i])
    case_list.append(case)
    file_list.append(file_name)
    class_list.append(0)
classification_dataset=pd.DataFrame({'case':case_list,'file name':file_list,'standard class':class_list})
classification_dataset.to_csv('../../data/classificationDDH/classification_dataset.csv',index=False)

  0%|          | 0/4628 [00:00<?, ?it/s]

  0%|          | 0/34721 [00:00<?, ?it/s]

In [None]:
#classification augmentation dataset create
standard_image_list=glob('../../data/pre_DDH/**/Standard/*.bmp')
not_standard_image_list=glob('../../data/pre_DDH/**/Not_Standard/*.bmp')
padding_standard_image_list=[f.replace('/pre_DDH', '/classificationDDH/aug_dataset') for f in standard_image_list]
padding_standard_image_list=[f.replace('.bmp', '.png') for f in padding_standard_image_list]
padding_standard_image_list=[f.replace('/Standard', '') for f in padding_standard_image_list]
padding_not_standard_image_list=[f.replace('/pre_DDH', '/classificationDDH/aug_dataset') for f in not_standard_image_list]
padding_not_standard_image_list=[f.replace('.bmp', '.png') for f in padding_not_standard_image_list]
padding_not_standard_image_list=[f.replace('/Not_Standard', '') for f in padding_not_standard_image_list]
image_size=512
case_list=[]
file_list=[]
class_list=[]
for i in tqdm(range(len(standard_image_list))):
    case=os.path.basename(os.path.dirname(os.path.dirname(standard_image_list[i])))
    file_name=os.path.basename(padding_standard_image_list[i])
    createDirectory(os.path.dirname(padding_standard_image_list[i]))
    image=expand2square(Image.open(standard_image_list[i]),(0,0,0))
    image=image.resize((image_size,image_size))
    image.save(padding_standard_image_list[i])
    case_list.append(case)
    file_list.append(file_name)
    class_list.append(1)
    
for i in tqdm(range(len(not_standard_image_list))):
    case=os.path.basename(os.path.dirname(os.path.dirname(not_standard_image_list[i])))
    file_name=os.path.basename(padding_not_standard_image_list[i])
    createDirectory(os.path.dirname(padding_not_standard_image_list[i]))
    image=expand2square(Image.open(not_standard_image_list[i]),(0,0,0))
    image=image.resize((image_size,image_size))
    image.save(padding_not_standard_image_list[i])
    case_list.append(case)
    file_list.append(file_name)
    class_list.append(0)
aug_classification_dataset=pd.DataFrame({'case':case_list,'file name':file_list,'standard class':class_list})
aug_classification_dataset.to_csv('../../data/classificationDDH/aug_classification_dataset.csv',index=False)

In [None]:
padding_not_standard_image_list

In [171]:
#segmentation dataset create
image_list=glob('../../data/pre_DDH/**/Standard/*.bmp')
mask_list=[f.replace('.bmp', '') for f in image_list]
mask_list=[f.replace('/Standard', '/raw_seg_mask') for f in mask_list]
padding_image_list=[f.replace('/pre_DDH', '/segmentationDDH/dataset') for f in image_list]
padding_image_list=[f.replace('.bmp', '.png') for f in padding_image_list]
padding_image_list=[f.replace('/Standard', '/image') for f in padding_image_list]

padding_mask_list=[f.replace('/pre_DDH', '/segmentationDDH/dataset') for f in mask_list]
padding_mask_list=[f.replace('/raw_seg_mask', '/mask') for f in padding_mask_list]

image_size=512
case_list=[]
file_list=[]
class_list=[]
for i in tqdm(range(len(image_list))):
    case=os.path.basename(os.path.dirname(os.path.dirname(image_list[i])))
    file_name=os.path.basename(padding_image_list[i])
    createDirectory(os.path.dirname(padding_image_list[i]))
    createDirectory(padding_mask_list[i])
    image=expand2square(Image.open(image_list[i]),(0,0,0))
    image=image.resize((image_size,image_size))
    image.save(padding_image_list[i])
    case_list.append(case)
    file_list.append(file_name)
    class_list.append(os.path.splitext(file_name)[0])
    for j in range(1,4):
        
        mask=expand2square(Image.open(mask_list[i]+'/'+str(j)+'.png'),(0))
        mask=mask.resize((image_size,image_size))
        mask.save(padding_mask_list[i]+'/'+str(j)+'.png')
segmentation_dataset=pd.DataFrame({'case':case_list,'file name':file_list,'standard mask':class_list})
segmentation_dataset.to_csv('../../data/segmentationDDH/segmentation_dataset.csv',index=False)    

  0%|          | 0/4628 [00:00<?, ?it/s]

In [169]:
os.path.dirname(padding_mask_list[i])


'../../data/segmenationDDH/dataset/2/mask'