# 3. Data Preprocessing

## 3.1 Import Libraries

In [1]:
import os, sys, random, gc, json 
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from io import StringIO

import cv2
from PIL import Image

## 3.2 Pre-processing

### 3.2.1 Pre-processor Class

In [3]:
class ImagePreprocessor:

    def convert_to_RGB(self, input_file_path, output_save_path):
        try:
            with Image.open(input_file_path) as img:
                # Convert to RGBA if the image has transparency
                if img.mode == 'P':
                    img = img.convert('RGBA')
    
                # Flatten to RGB
                if img.mode == 'RGBA':
                    # Create an RGB image with the same color(white background)
                    background = Image.new('RGB', img.size, (255, 255, 255))
                    # 3 is the alpha channel
                    background.paste(img, mask=img.split()[3])
                    img = background

                if img.mode not in ['P', 'RGBA']:
                    img = img.convert('RGB')
                
                img.save(output_save_path)
        
        except Exception as e:
            print(f"Error converting image at {input_file_path}: {e}")

    def color_segmentation_YCrCb(self, input_file_path, output_save_path, min_YCrCb, max_YCrCb):
        try:
            img = cv2.imread(input_file_path)
            
            # Convert to YCrCb color space
            ycrcb = cv2.cvtColor(img, cv2.COLOR_BGR2YCrCb)

            # Find region with skin related tone in YCrCb image
            skinRegion = cv2.inRange(ycrcb, min_YCrCb, max_YCrCb)

            # Apply bitwise on original image and mask
            skin = cv2.bitwise_and(img, img, mask = skinRegion)

            # Save image
            cv2.imwrite(output_save_path, skin)
        
        except Exception as e:
            print(f"Error in masking skin pixels at {input_file_path}: {e}")

    def color_segmentation_hsv(self, input_file_path, output_save_path, min_HSV, max_HSV):
        try:
            img = cv2.imread(input_file_path)
            
            # Convert to HSV color space
            hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
    
            # Define color range in HSV color space
            min_HSV = np.array(min_HSV, np.uint8)
            max_HSV = np.array(max_HSV, np.uint8)

            # Find region with skin related tone hues in HSV image
            colorRegion = cv2.inRange(hsv, min_HSV, max_HSV)
    
            # Apply bitwise on the original image and mask
            colorSegmented = cv2.bitwise_and(img, img, mask=colorRegion)
    
            # Save image
            cv2.imwrite(output_save_path, colorSegmented)
    
        except Exception as e:
            print(f"Error in color segmentation at {input_file_path}: {e}")

imagePreprocessor = ImagePreprocessor()

#### 3.2.1.1 Convert to RGB

In [4]:
input_folder = '../02. Datasets/Images'

# Iterate over each sub-folder in input_folder
for sub_folder_name in sorted(os.listdir(input_folder)):
    sub_folder_path = f"{input_folder}/{sub_folder_name}"
    if os.path.isdir(sub_folder_path):
        # Iterate over each file in the sub-folder
        for file_name in sorted(os.listdir(sub_folder_path)):
            file_path = f"{sub_folder_path}/{file_name}"
            # Pre-process: Convert raw image files to RGB
            imagePreprocessor.convert_to_RGB(file_path , file_path)

#### 3.2.1.2 Color Segmentation- YCrCb

In [8]:
input_folder = '../02. Datasets/Images'
output_folder = '../02. Datasets/Images_Preprocessed'

# Define color range in YCrCb color space
min_YCrCb = np.array([0, 130, 90], np.uint8)
max_YCrCb = np.array([255, 183, 130], np.uint8)

# Iterate over each sub-folder in input_folder
for sub_folder_name in sorted(os.listdir(input_folder)):
    sub_folder_path = f"{input_folder}/{sub_folder_name}"
    if os.path.isdir(sub_folder_path):
        # Iterate over each file in the sub-folder
        for file_name in sorted(os.listdir(sub_folder_path)):
            file_path = f"{sub_folder_path}/{file_name}"
            output_file_path = os.path.join(output_folder, sub_folder_name, file_name)
            os.makedirs(os.path.dirname(output_file_path), exist_ok=True)
            # Pre-process: Mask skin pixels using color segmentation YCrCb method
            imagePreprocessor.color_segmentation_YCrCb(file_path , output_file_path, min_YCrCb, max_YCrCb)

Error in masking skin pixels at ../02. Datasets/Images/Acne Conglobata/junk: OpenCV(4.9.0) /io/opencv/modules/imgproc/src/color.cpp:196: error: (-215:Assertion failed) !_src.empty() in function 'cvtColor'

Error in masking skin pixels at ../02. Datasets/Images/Acne Keloid/junk: OpenCV(4.9.0) /io/opencv/modules/imgproc/src/color.cpp:196: error: (-215:Assertion failed) !_src.empty() in function 'cvtColor'

Error in masking skin pixels at ../02. Datasets/Images/Dermatitis Herpetiformis/junk: OpenCV(4.9.0) /io/opencv/modules/imgproc/src/color.cpp:196: error: (-215:Assertion failed) !_src.empty() in function 'cvtColor'

Error in masking skin pixels at ../02. Datasets/Images/Dermatitis, Atopic/junk: OpenCV(4.9.0) /io/opencv/modules/imgproc/src/color.cpp:196: error: (-215:Assertion failed) !_src.empty() in function 'cvtColor'

Error in masking skin pixels at ../02. Datasets/Images/Dermatitis, Seborrheic/junk: OpenCV(4.9.0) /io/opencv/modules/imgproc/src/color.cpp:196: error: (-215:Assertion f

#### 3.2.1.3 Color Segmentation- HSV

In [12]:
input_folder = '../02. Datasets/Images'
output_folder = '../02. Datasets/Images_Preprocessed'

# Define hues in HSV color space
min_HSV = [0, 50, 50]
max_HSV = [35, 150, 255]

# Iterate over each sub-folder in input_folder
for sub_folder_name in sorted(os.listdir(input_folder)):
    sub_folder_path = f"{input_folder}/{sub_folder_name}"
    if os.path.isdir(sub_folder_path):
        # Iterate over each file in the sub-folder
        for file_name in sorted(os.listdir(sub_folder_path)):
            file_path = f"{sub_folder_path}/{file_name}"
            output_file_path = os.path.join(output_folder, sub_folder_name, file_name)
            os.makedirs(os.path.dirname(output_file_path), exist_ok=True)
            # Pre-process: Mask skin pixels using color segmentation hsv method
            imagePreprocessor.color_segmentation_hsv(file_path , output_file_path, min_HSV, max_HSV)

Error in color segmentation at ../02. Datasets/Images/Acne Conglobata/junk: OpenCV(4.9.0) /io/opencv/modules/imgproc/src/color.cpp:196: error: (-215:Assertion failed) !_src.empty() in function 'cvtColor'

Error in color segmentation at ../02. Datasets/Images/Acne Keloid/junk: OpenCV(4.9.0) /io/opencv/modules/imgproc/src/color.cpp:196: error: (-215:Assertion failed) !_src.empty() in function 'cvtColor'

Error in color segmentation at ../02. Datasets/Images/Dermatitis Herpetiformis/junk: OpenCV(4.9.0) /io/opencv/modules/imgproc/src/color.cpp:196: error: (-215:Assertion failed) !_src.empty() in function 'cvtColor'

Error in color segmentation at ../02. Datasets/Images/Dermatitis, Atopic/junk: OpenCV(4.9.0) /io/opencv/modules/imgproc/src/color.cpp:196: error: (-215:Assertion failed) !_src.empty() in function 'cvtColor'

Error in color segmentation at ../02. Datasets/Images/Dermatitis, Seborrheic/junk: OpenCV(4.9.0) /io/opencv/modules/imgproc/src/color.cpp:196: error: (-215:Assertion failed