In [None]:
# /// Basic libraries
import numpy as np
import pandas as pd
import random

# /// Path files and xml manipulation
from os.pat1bh import isfile, join
import xmltodict #library that treats xmls as json files
import os
from os import listdir #os library that lists all the directories

# /// Image processing
import cv2 #opencv
from pathlib import Path

# /// Visualization
import matplotlib.pyplot as plt
import seaborn as sns



In [None]:
def directory_creation(dirname):
    '''
    Function to create directories in our system. The function doesn't create duplicate directories even if we execute
    the function multiple times.
    '''
    try:
        os.mkdir(dirname) #create directory
    except FileExistsError: #Error founded
        print("Directory " + dirname + " already exists.")

In [None]:
def ImageNamesExtraction():
    
    """
    This function give us all the image names of our dataset
    """
    
    img_names = []
    for dirname, _, filenames in os.walk('../ih-final-project/data/medical-masks-dataset/images'):
        for filename in filenames:
            fullpath = os.path.join(dirname, filename)
            extension = fullpath[len(fullpath) - 4:]
            if extension != '.xml':
                img_names.append(filename)
    return img_names

In [None]:
dir_name = 'train_data/'

label_0_dir = dir_name + "0/"
label_1_dir = dir_name + "1/"

models_dir = "models/"

directory_creation(dir_name)
directory_creation(label_0_dir)
directory_creation(label_1_dir)
directory_creation(models_dir)

In [None]:
def getting_path(img_name):
    
    """
    This function extract the image and label path to reassign the same name to both files.
    And return the new image and label path of the images.
    
    """
    
    path_master = '../ih-final-project/data/medical-masks-dataset/'
    img_path = path_master + 'images/' + img_name
    
    if img_name[-4:] == 'jpeg':
        lbl_name = img_name[:-5] + '.xml'
    else:
        lbl_name = img_name[:-4] + '.xml'
    
    lbl_path = path_master + 'labels/' + lbl_name
        
    return  img_path, lbl_path

In [None]:
def images_cropping(img_name):
    
    img_path, lbl_path = getting_path(img_name)
    
    # we do the same operation of the previous function. Read and convert color.
    image = cv2.imread(img_path)
    
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    
    # get the labels and size info of the images
    labels, size = parse_xml(lbl_path)
    
    # creating a list with croped image labels
    cropedImgLabels = []

    for label in labels:
        name, bndbox = label
        
        # we only want the interior part of the boundbox
        croped_img = image[bndbox[0][1]:bndbox[1][1], bndbox[0][0]:bndbox[1][0]]
        
        label_num = 0
        
        if name == "good": # ASSIGN 0 TO GOOD CROPPING
            label_num = 0
        elif name == "bad": # 1 TO BAD CROPPING
            label_num = 1
        else:
            label_num = 2
        
        cropedImgLabel = [croped_img, label_num]
        
        cropedImgLabels.append(cropedImgLabel)
        
    return cropedImgLabels  

In [None]:
label_0_counter = 0 # GOOD
label_1_counter = 0 # BAD

for img_name in img_names:
    cropedImgLabels = images_cropping(img_name)
    
    for cropedImgLabel in cropedImgLabels:
        
        label = cropedImgLabel[1]
        img = cropedImgLabel[0]
        
        if label == 0:
            croped_img_name = str(label_0_counter) + ".jpg"
            cv2.imwrite(label_0_dir + croped_img_name, img)
            label_0_counter += 1
            
        elif label == 1:
            croped_img_name = str(label_1_counter) + ".jpg"
            cv2.imwrite(label_1_dir + croped_img_name, img)
            label_1_counter += 1