# Original

In [1]:
import os
os.getcwd()

'C:\\Users\\Admin\\Documents\\SKRIPSI\\Kode\\Model'

In [2]:
import os
from PIL import Image
from collections import Counter

# Define the folder paths
path = 'C:\\Users\\Admin\\Documents\\SKRIPSI\\DATASET\\PREPROCESSING\\256'
source_ori = path + '\\prep_ori'
source_gt = path + '\\prep_gt'

def analyze_images(folder_path):
    total_images = 0
    total_size = [0, 0]  # Width, Height
    total_pixels = 0
    color_modes = []
    file_formats = []

    for filename in os.listdir(folder_path):
        if filename.lower().endswith(('.png', '.jpg', '.jpeg')):
            total_images += 1
            img_path = os.path.join(folder_path, filename)
            with Image.open(img_path) as img:
                width, height = img.size
                total_size[0] += width
                total_size[1] += height
                total_pixels += width * height
                color_modes.append(img.mode)
                file_formats.append(img.format)

    # Calculate average size
    if total_images > 0:
        avg_size = (total_size[0] // total_images, total_size[1] // total_images)
        avg_total_pixels = total_pixels / total_images
        most_common_color_mode = Counter(color_modes).most_common(1)[0][0]
        most_common_format = Counter(file_formats).most_common(1)[0][0]
    else:
        avg_size = (0, 0)
        avg_total_pixels = 0
        most_common_color_mode = None
        most_common_format = None

    return {
        "Total Gambar": total_images,
        "Rata-rata Ukuran Gambar": avg_size,
        "Rata-rata Total Piksel": avg_total_pixels,
        "Mode Warna Paling Sering": most_common_color_mode,
        "Format File": most_common_format
    }

# Analyze both folders
results = {
    "Original": analyze_images(source_ori),
    "Ground Truth": analyze_images(source_gt)
}

# Display results
for folder_name, info in results.items():
    print(f"Hasil Analisis untuk Folder {folder_name}:")
    print(f"Total Gambar: {info['Total Gambar']}")
    print(f"Rata-rata Ukuran Gambar: ({info['Rata-rata Ukuran Gambar'][0]:.2f} x {info['Rata-rata Ukuran Gambar'][1]:.2f})")
    print(f"Rata-rata Total Piksel: {info['Rata-rata Total Piksel']:.2f}")
    print(f"Mode Warna Paling Sering: {info['Mode Warna Paling Sering']}")
    print(f"Format File: {info['Format File']}\n")

# Ensure that the number of images in the Original and Ground Truth folders is the same
assert results["Original"]["Total Gambar"] == results["Ground Truth"]["Total Gambar"], "Jumlah gambar di folder ORI dan GT tidak sama"

# Ensure that the average image size in both folders is the same
assert results["Original"]["Rata-rata Ukuran Gambar"] == results["Ground Truth"]["Rata-rata Ukuran Gambar"], "Rata-rata ukuran gambar di folder ORI dan GT tidak sama"

# Ensure that the average total pixels in both folders are the same
assert results["Original"]["Rata-rata Total Piksel"] == results["Ground Truth"]["Rata-rata Total Piksel"], "Rata-rata total piksel di folder ORI dan GT tidak sama"

# Ensure that the most common color mode in both folders is the same
assert results["Original"]["Mode Warna Paling Sering"] == results["Ground Truth"]["Mode Warna Paling Sering"], "Mode warna paling sering di folder ORI dan GT tidak sama"

# Ensure that the file formats in both folders are the same
assert results["Original"]["Format File"] == results["Ground Truth"]["Format File"], "Format file di folder ORI dan GT tidak sama"

print("\nSemua pengujian berhasil! Folder original dan ground_truth memiliki data gambar yang sama.")

Hasil Analisis untuk Folder Original:
Total Gambar: 115
Rata-rata Ukuran Gambar: (256.00 x 256.00)
Rata-rata Total Piksel: 65536.00
Mode Warna Paling Sering: RGB
Format File: PNG

Hasil Analisis untuk Folder Ground Truth:
Total Gambar: 115
Rata-rata Ukuran Gambar: (256.00 x 256.00)
Rata-rata Total Piksel: 65536.00
Mode Warna Paling Sering: RGB
Format File: PNG


Semua pengujian berhasil! Folder original dan ground_truth memiliki data gambar yang sama.


In [3]:
from sklearn.model_selection import train_test_split
import shutil

# Folder tujuan untuk train, validation, dan test di bawah base
train_dir = path + '\\split\\original\\train'
val_dir = path + '\\split\\original\\val'
test_dir = path + '\\split\\original\\test'

# Membuat folder untuk train, validation, dan test
os.makedirs(os.path.join(train_dir, 'original'), exist_ok=True)
os.makedirs(os.path.join(train_dir, 'ground_truth'), exist_ok=True)
os.makedirs(os.path.join(val_dir, 'original'), exist_ok=True)
os.makedirs(os.path.join(val_dir, 'ground_truth'), exist_ok=True)
os.makedirs(os.path.join(test_dir, 'original'), exist_ok=True)
os.makedirs(os.path.join(test_dir, 'ground_truth'), exist_ok=True)


# Mendapatkan semua file gambar dan mask
original_files = sorted(os.listdir(source_ori))
ground_truth_files = sorted(os.listdir(source_gt))

# Pengecekan apakah jumlah file gambar dan mask sama
if len(original_files) != len(ground_truth_files):
    print(f"WARNING: Jumlah gambar asli ({len(original_files)}) tidak sama dengan jumlah mask ({len(ground_truth_files)})")
else:
    print(f"Jumlah gambar dan mask sesuai: {len(original_files)} file")

# Rasio pembagian dataset
train_ratio = 0.8  # 80% untuk train
val_ratio = 0.1    # 10% untuk validation
test_ratio = 0.1   # 10% untuk test

# Pastikan rasio totalnya 1.0
if (train_ratio + val_ratio + test_ratio) != 1.0:
    raise ValueError("Rasio train, validation, dan test harus berjumlah 1.0")

# Membagi dataset menjadi train, val, test
train_original, temp_original, train_ground_truth, temp_ground_truth = train_test_split(
    original_files, ground_truth_files, test_size=(1.0 - train_ratio), random_state=42)

val_size = val_ratio / (val_ratio + test_ratio)  # Menghitung rasio validasi terhadap (validasi + test)

val_original, test_original, val_ground_truth, test_ground_truth = train_test_split(
    temp_original, temp_ground_truth, test_size=(1.0 - val_size), random_state=42)

# Fungsi untuk menyalin file ke direktori yang tepat
def copy_files(original_list, ground_truth_list, folder):
    if not original_list or not ground_truth_list:
        print(f"ERROR: Tidak ada file untuk diproses di {folder}")
        return

    for orig, gt in zip(original_list, ground_truth_list):
        shutil.copy(os.path.join(source_ori, orig), os.path.join(folder, 'original', orig))
        shutil.copy(os.path.join(source_gt, gt), os.path.join(folder, 'ground_truth', gt))

    print(f"{len(original_list)} file berhasil disalin ke {folder}")

# Menyalin file ke folder masing-masing
copy_files(train_original, train_ground_truth, train_dir)
copy_files(val_original, val_ground_truth, val_dir)
copy_files(test_original, test_ground_truth, test_dir)

Jumlah gambar dan mask sesuai: 115 file
92 file berhasil disalin ke C:\Users\Admin\Documents\SKRIPSI\DATASET\PREPROCESSING\256\split\original\train
11 file berhasil disalin ke C:\Users\Admin\Documents\SKRIPSI\DATASET\PREPROCESSING\256\split\original\val
12 file berhasil disalin ke C:\Users\Admin\Documents\SKRIPSI\DATASET\PREPROCESSING\256\split\original\test


# Augmentasi

In [None]:
import os
from PIL import Image
from collections import Counter

# Define the folder paths
path = 'C:\\Users\\Admin\\Documents\\SKRIPSI\\DATASET\\PREPROCESSING\\256\\augmentation'
source_ori = path + '\\original'
source_gt = path + '\\ground_truth'

def analyze_images(folder_path):
    total_images = 0
    total_size = [0, 0]  # Width, Height
    total_pixels = 0
    color_modes = []
    file_formats = []

    for filename in os.listdir(folder_path):
        if filename.lower().endswith(('.png', '.jpg', '.jpeg')):
            total_images += 1
            img_path = os.path.join(folder_path, filename)
            with Image.open(img_path) as img:
                width, height = img.size
                total_size[0] += width
                total_size[1] += height
                total_pixels += width * height
                color_modes.append(img.mode)
                file_formats.append(img.format)

    # Calculate average size
    if total_images > 0:
        avg_size = (total_size[0] // total_images, total_size[1] // total_images)
        avg_total_pixels = total_pixels / total_images
        most_common_color_mode = Counter(color_modes).most_common(1)[0][0]
        most_common_format = Counter(file_formats).most_common(1)[0][0]
    else:
        avg_size = (0, 0)
        avg_total_pixels = 0
        most_common_color_mode = None
        most_common_format = None

    return {
        "Total Gambar": total_images,
        "Rata-rata Ukuran Gambar": avg_size,
        "Rata-rata Total Piksel": avg_total_pixels,
        "Mode Warna Paling Sering": most_common_color_mode,
        "Format File": most_common_format
    }

# Analyze both folders
results = {
    "Original": analyze_images(source_ori),
    "Ground Truth": analyze_images(source_gt)
}

# Display results
for folder_name, info in results.items():
    print(f"Hasil Analisis untuk Folder {folder_name}:")
    print(f"Total Gambar: {info['Total Gambar']}")
    print(f"Rata-rata Ukuran Gambar: ({info['Rata-rata Ukuran Gambar'][0]:.2f} x {info['Rata-rata Ukuran Gambar'][1]:.2f})")
    print(f"Rata-rata Total Piksel: {info['Rata-rata Total Piksel']:.2f}")
    print(f"Mode Warna Paling Sering: {info['Mode Warna Paling Sering']}")
    print(f"Format File: {info['Format File']}\n")

# Ensure that the number of images in the Original and Ground Truth folders is the same
assert results["Original"]["Total Gambar"] == results["Ground Truth"]["Total Gambar"], "Jumlah gambar di folder ORI dan GT tidak sama"

# Ensure that the average image size in both folders is the same
assert results["Original"]["Rata-rata Ukuran Gambar"] == results["Ground Truth"]["Rata-rata Ukuran Gambar"], "Rata-rata ukuran gambar di folder ORI dan GT tidak sama"

# Ensure that the average total pixels in both folders are the same
assert results["Original"]["Rata-rata Total Piksel"] == results["Ground Truth"]["Rata-rata Total Piksel"], "Rata-rata total piksel di folder ORI dan GT tidak sama"

# Ensure that the most common color mode in both folders is the same
assert results["Original"]["Mode Warna Paling Sering"] == results["Ground Truth"]["Mode Warna Paling Sering"], "Mode warna paling sering di folder ORI dan GT tidak sama"

# Ensure that the file formats in both folders are the same
assert results["Original"]["Format File"] == results["Ground Truth"]["Format File"], "Format file di folder ORI dan GT tidak sama"

print("\nSemua pengujian berhasil! Folder original dan ground_truth memiliki data gambar yang sama.")

In [None]:
from sklearn.model_selection import train_test_split
import shutil

# Folder tujuan untuk train, validation, dan test di bawah base
path = 'C:\\Users\\Admin\\Documents\\SKRIPSI\\DATASET\\PREPROCESSING\\256'
train_dir = path + '\\split\\augmentasi\\train'
val_dir = path + '\\split\\augmentasi\\val'
test_dir = path + '\\split\\augmentasi\\test'

# Membuat folder untuk train, validation, dan test
os.makedirs(os.path.join(train_dir, 'original'), exist_ok=True)
os.makedirs(os.path.join(train_dir, 'ground_truth'), exist_ok=True)
os.makedirs(os.path.join(val_dir, 'original'), exist_ok=True)
os.makedirs(os.path.join(val_dir, 'ground_truth'), exist_ok=True)
os.makedirs(os.path.join(test_dir, 'original'), exist_ok=True)
os.makedirs(os.path.join(test_dir, 'ground_truth'), exist_ok=True)


# Mendapatkan semua file gambar dan mask
original_files = sorted(os.listdir(source_ori))
ground_truth_files = sorted(os.listdir(source_gt))

# Pengecekan apakah jumlah file gambar dan mask sama
if len(original_files) != len(ground_truth_files):
    print(f"WARNING: Jumlah gambar asli ({len(original_files)}) tidak sama dengan jumlah mask ({len(ground_truth_files)})")
else:
    print(f"Jumlah gambar dan mask sesuai: {len(original_files)} file")

# Rasio pembagian dataset
train_ratio = 0.8  # 80% untuk train
val_ratio = 0.1    # 10% untuk validation
test_ratio = 0.1   # 10% untuk test

# Pastikan rasio totalnya 1.0
if (train_ratio + val_ratio + test_ratio) != 1.0:
    raise ValueError("Rasio train, validation, dan test harus berjumlah 1.0")

# Membagi dataset menjadi train, val, test
train_original, temp_original, train_ground_truth, temp_ground_truth = train_test_split(
    original_files, ground_truth_files, test_size=(1.0 - train_ratio), random_state=42)

val_size = val_ratio / (val_ratio + test_ratio)  # Menghitung rasio validasi terhadap (validasi + test)

val_original, test_original, val_ground_truth, test_ground_truth = train_test_split(
    temp_original, temp_ground_truth, test_size=(1.0 - val_size), random_state=42)

# Fungsi untuk menyalin file ke direktori yang tepat
def copy_files(original_list, ground_truth_list, folder):
    if not original_list or not ground_truth_list:
        print(f"ERROR: Tidak ada file untuk diproses di {folder}")
        return

    for orig, gt in zip(original_list, ground_truth_list):
        shutil.copy(os.path.join(source_ori, orig), os.path.join(folder, 'original', orig))
        shutil.copy(os.path.join(source_gt, gt), os.path.join(folder, 'ground_truth', gt))

    print(f"{len(original_list)} file berhasil disalin ke {folder}")

# Menyalin file ke folder masing-masing
copy_files(train_original, train_ground_truth, train_dir)
copy_files(val_original, val_ground_truth, val_dir)
copy_files(test_original, test_ground_truth, test_dir)