In [1]:
import os
os.getcwd()

'C:\\Users\\Admin\\Documents\\SKRIPSI\\Kode\\Model'

# Informasi Dataset

In [2]:
# Path ke folder 'original'
ori_folder = 'C:\\Users\\Admin\\Documents\\SKRIPSI\\DATASET\\ORI'
gt_folder = 'C:\\Users\\Admin\\Documents\\SKRIPSI\\DATASET\\GT'

def get_file_formats(folder_path):
  """
  Fungsi untuk mendapatkan format file yang ada di dalam folder.
  """
  formats = set()
  for filename in os.listdir(folder_path):
    formats.add(filename.split('.')[-1].lower())
  return list(formats)

print(f"\nJumlah data di folder original: {len(os.listdir(ori_folder))}")
print(f"Jumlah data di folder ground_truth: {len(os.listdir(gt_folder))}")

print(f"\nFormat file di folder original: {get_file_formats(ori_folder)}")
print(f"Format file di folder ground_truth: {get_file_formats(gt_folder)}")


Jumlah data di folder original: 115
Jumlah data di folder ground_truth: 115

Format file di folder original: ['png']
Format file di folder ground_truth: ['png']


In [3]:
import os
import numpy as np
from PIL import Image

# Fungsi untuk mendapatkan informasi detail dari gambar
def get_image_info(image_path):
    with Image.open(image_path) as img:
        img_format = img.format  # Format gambar (misal: JPEG, PNG)
        img_size = img.size      # Ukuran gambar (width, height)
        img_mode = img.mode      # Mode gambar (misal: RGB, L, RGBA)

        # Mengonversi gambar ke array numpy untuk mendapatkan shape
        img_array = np.array(img)
        img_shape = img_array.shape

        img_info = {
            "Format": img_format,
            "Ukuran (Width x Height)": img_size,
            "Mode Warna": img_mode,
            "Total Pixel": img_size[0] * img_size[1],
            "Shape": img_shape
        }
        return img_info

# Mengurutkan file berdasarkan nama di folder original
image_files = sorted([f for f in os.listdir(ori_folder) if f.endswith(('.png', '.jpg', '.jpeg', '.bmp', '.tiff'))])

# Menampilkan informasi detail setiap gambar di folder original
for image_file in image_files:
    image_path = os.path.join(ori_folder, image_file)
    info = get_image_info(image_path)
    print(f"\nInformasi Gambar: {image_file}")
    for key, value in info.items():
        print(f"{key}: {value}")

# Mengurutkan file berdasarkan nama di folder ground_truth
label_files = sorted([f for f in os.listdir(gt_folder) if f.endswith(('.png', '.jpg', '.jpeg', '.bmp', '.tiff'))])

# Menampilkan informasi detail setiap gambar di folder ground_truth
for label_file in label_files:
    label_path = os.path.join(gt_folder, label_file)
    info = get_image_info(label_path)
    print(f"\nInformasi Gambar Ground Truth: {label_file}")
    for key, value in info.items():
        print(f"{key}: {value}")


Informasi Gambar: Img_001.png
Format: PNG
Ukuran (Width x Height): (600, 600)
Mode Warna: RGB
Total Pixel: 360000
Shape: (600, 600, 3)

Informasi Gambar: Img_002.png
Format: PNG
Ukuran (Width x Height): (800, 450)
Mode Warna: RGB
Total Pixel: 360000
Shape: (450, 800, 3)

Informasi Gambar: Img_003.png
Format: PNG
Ukuran (Width x Height): (1942, 1456)
Mode Warna: RGB
Total Pixel: 2827552
Shape: (1456, 1942, 3)

Informasi Gambar: Img_004.png
Format: PNG
Ukuran (Width x Height): (600, 600)
Mode Warna: RGB
Total Pixel: 360000
Shape: (600, 600, 3)

Informasi Gambar: Img_005.png
Format: PNG
Ukuran (Width x Height): (600, 600)
Mode Warna: RGB
Total Pixel: 360000
Shape: (600, 600, 3)

Informasi Gambar: Img_006.png
Format: PNG
Ukuran (Width x Height): (2304, 1536)
Mode Warna: RGB
Total Pixel: 3538944
Shape: (1536, 2304, 3)

Informasi Gambar: Img_007.png
Format: PNG
Ukuran (Width x Height): (428, 288)
Mode Warna: RGB
Total Pixel: 123264
Shape: (288, 428, 3)

Informasi Gambar: Img_008.png
Format:

# Preprocessing Data

In [4]:
import os
import shutil
from PIL import Image
import numpy as np
import cv2
import matplotlib.pyplot as plt

# Path ke folder
# Ensure base_dataset is defined correctly.
base_dataset = "C:\\Users\\Admin\\Documents\\SKRIPSI\\DATASET"
ori_folder = os.path.join(base_dataset, "ORI")
gt_folder = os.path.join(base_dataset, "GT")

# Defining target sizes
low = (128, 128)
standard = (256, 256)
high = (512, 512)

# Set the size and target for resizing
target = (256, 256)
# Output folder paths
base_preprocessed = os.path.join(base_dataset,"PREPROCESSING")
preprocessed_ori_data = os.path.join(base_preprocessed, "256\\prep_ori")
preprocessed_gt_data = os.path.join(base_preprocessed, "256\\prep_gt")
temp_ori_folder = os.path.join(base_dataset, "256\\temp_ori")  # Temporary folder for originals
temp_gt_folder = os.path.join(base_dataset, "256\\temp_gt")    # Temporary folder for ground truths

# Function to clear the output folder before processing
def clear_folder(folder_path):
    if os.path.exists(folder_path):
        for file in os.listdir(folder_path):
            file_path = os.path.join(folder_path, file)
            try:
                if os.path.isfile(file_path):
                    os.remove(file_path)  # Remove file
            except Exception as e:
                print(f"Error removing file {file_path}: {e}")

# Clear the output folders before saving new images
clear_folder(preprocessed_ori_data)
clear_folder(preprocessed_gt_data)

# Create output folders if they do not exist
os.makedirs(preprocessed_ori_data, exist_ok=True)
os.makedirs(preprocessed_gt_data, exist_ok=True)

# Create temporary folders for renaming
os.makedirs(temp_ori_folder, exist_ok=True)
os.makedirs(temp_gt_folder, exist_ok=True)

# Function to normalize images
def normalize_image(image):
    """Normalize the image to range [0, 1]."""
    image_array = np.array(image).astype(np.float32) / 255.0  # Normalize to [0, 1]
    return Image.fromarray((image_array * 255).astype(np.uint8))  # Convert back to Image


def copy_and_rename(src_folder, dst_folder, prefix):
    for i, filename in enumerate(sorted(os.listdir(src_folder)), start=1):
        old_file_path = os.path.join(src_folder, filename)
        if os.path.isfile(old_file_path) and "Img" in filename:
            # Rename the file
            ext = os.path.splitext(filename)[1]
            new_filename = f"{prefix}_{i:03d}{ext}"
            new_file_path = os.path.join(dst_folder, new_filename)

            # Copy the original file to the temporary folder with the new name
            shutil.copy2(old_file_path, new_file_path)
            print(f'Copied and renamed: {filename} -> {new_filename}')

# Function to preprocess images (both originals and masks)
def preprocess_images(src_folder, dst_folder, is_mask=False):
    processed_images = []
    file_list = sorted(os.listdir(src_folder))  # Ensure file order is consistent

    for filename in file_list:
        if filename.lower().endswith(('.png', '.jpg', '.jpeg')):
            img_path = os.path.join(src_folder, filename)
            try:
                with Image.open(img_path) as img:
                    if not is_mask:
                        # Preprocess original images
                        img = img.convert("RGB")  # Ensure RGB mode
                        img = img.resize(target, Image.Resampling.LANCZOS)
                        img = normalize_image(img)
                    else:
                        # Preprocess ground truth images (masks)
                        img = img.convert("RGB")  # Ensure RGB mode for masks
                        img = img.resize(target, Image.Resampling.NEAREST)  # Nearest interpolation to preserve class values
                        img = enhance_contrast_for_mask(img)  # Enhance contrast without creating gradients
                        img = threshold_colors(img)  # Threshold colors to remove noise

                    # Save the processed image with the modified filename and change the extension to PNG
                    new_filename = os.path.splitext(filename)[0] + '.png'  # Change the file extension to .png
                    img.save(os.path.join(dst_folder, new_filename), format='PNG')

                    # Store the image paths for example visualization
                    processed_images.append(os.path.join(dst_folder, new_filename))

            except Exception as e:
                print(f"Error processing file {filename}: {e}")

    return processed_images

# Function to enhance contrast for ground truth images without changing class values
def enhance_contrast_for_mask(image):
    """Enhance contrast for ground truth image while preserving class values."""
    image_array = np.array(image)
    unique_colors = np.unique(image_array.reshape(-1, image_array.shape[2]), axis=0)

    if len(unique_colors) <= 3:
        # Convert to grayscale and apply histogram equalization
        gray_image = cv2.cvtColor(image_array, cv2.COLOR_RGB2GRAY)
        enhanced_gray = cv2.equalizeHist(gray_image)

        # Create an enhanced image based on the original colors
        enhanced_mask = np.zeros_like(image_array)
        for color in unique_colors:
            mask = np.all(image_array == color, axis=-1)
            enhanced_mask[mask] = color  # Keep the original colors
    else:
        enhanced_mask = image_array  # Skip enhancement

    return Image.fromarray(enhanced_mask)

# Function to threshold colors in the mask
def threshold_colors(image):
    """Threshold the colors in the ground truth mask."""
    image_array = np.array(image)
    thresholded_image = np.where(image_array > 180, 255, 0).astype(np.uint8)  # Thresholding
    return Image.fromarray(thresholded_image)

# Copy and rename original images to the temporary folder
copy_and_rename(ori_folder, temp_ori_folder, prefix='ORI')

# Copy and rename ground truth images to the temporary folder
copy_and_rename(gt_folder, temp_gt_folder, prefix='GT')

# Preprocess renamed original images
preprocess_images(temp_ori_folder, preprocessed_ori_data, is_mask=False)

# Preprocess renamed ground truth images
preprocess_images(temp_gt_folder, preprocessed_gt_data, is_mask=True)

# Cleanup temporary folders if needed
shutil.rmtree(temp_ori_folder)
shutil.rmtree(temp_gt_folder)

print("Renaming and preprocessing completed.")

Copied and renamed: Img_001.png -> ORI_001.png
Copied and renamed: Img_002.png -> ORI_002.png
Copied and renamed: Img_003.png -> ORI_003.png
Copied and renamed: Img_004.png -> ORI_004.png
Copied and renamed: Img_005.png -> ORI_005.png
Copied and renamed: Img_006.png -> ORI_006.png
Copied and renamed: Img_007.png -> ORI_007.png
Copied and renamed: Img_008.png -> ORI_008.png
Copied and renamed: Img_009.png -> ORI_009.png
Copied and renamed: Img_010.png -> ORI_010.png
Copied and renamed: Img_011.png -> ORI_011.png
Copied and renamed: Img_012.png -> ORI_012.png
Copied and renamed: Img_013.png -> ORI_013.png
Copied and renamed: Img_014.png -> ORI_014.png
Copied and renamed: Img_015.png -> ORI_015.png
Copied and renamed: Img_016.png -> ORI_016.png
Copied and renamed: Img_017.png -> ORI_017.png
Copied and renamed: Img_018.png -> ORI_018.png
Copied and renamed: Img_019.png -> ORI_019.png
Copied and renamed: Img_020.png -> ORI_020.png
Copied and renamed: Img_021.png -> ORI_021.png
Copied and re

In [5]:
import os
from PIL import Image

# Pastikan path dataset sudah sesuai
ori_folder = "C:\\Users\\Admin\\Documents\\SKRIPSI\\DATASET\\PREPROCESSING\\256\\prep_ori"
gt_folder = "C:\\Users\\Admin\\Documents\\SKRIPSI\\DATASET\\PREPROCESSING\\256\\prep_gt"

# Fungsi untuk mendapatkan informasi gambar
def get_image_info(folder_path):
    image_files = os.listdir(folder_path)
    resolutions = []
    color_modes = []
    formats = []

    for image_file in image_files:
        file_path = os.path.join(folder_path, image_file)
        with Image.open(file_path) as img:
            resolutions.append(img.size)
            color_modes.append(img.mode)
            formats.append(img.format)

    return {
        "total_images": len(image_files),
        "resolution": set(resolutions),
        "color_mode": set(color_modes),
        "file_format": set(formats)
    }

# Informasi dataset original
ori_info = get_image_info(ori_folder)
print(f"\nJumlah data di folder original: {ori_info['total_images']}")
print(f"Resolusi gambar di folder original: {ori_info['resolution']}")
print(f"Format warna gambar di folder original: {ori_info['color_mode']}")
print(f"Format file di folder original: {ori_info['file_format']}")

# Informasi dataset ground truth
gt_info = get_image_info(gt_folder)
print(f"\nJumlah data di folder ground_truth: {gt_info['total_images']}")
print(f"Resolusi gambar di folder ground_truth: {gt_info['resolution']}")
print(f"Format warna gambar di folder ground_truth: {gt_info['color_mode']}")
print(f"Format file di folder ground_truth: {gt_info['file_format']}")


Jumlah data di folder original: 115
Resolusi gambar di folder original: {(256, 256)}
Format warna gambar di folder original: {'RGB'}
Format file di folder original: {'PNG'}

Jumlah data di folder ground_truth: 115
Resolusi gambar di folder ground_truth: {(256, 256)}
Format warna gambar di folder ground_truth: {'RGB'}
Format file di folder ground_truth: {'PNG'}
