In [None]:
import os
import rasterio
import numpy as np
import cv2  # Mengimpor OpenCV untuk interpolasi
from scipy.ndimage import generic_filter

def check_and_interpolate_nans(data, band_idx):
    """
    Memeriksa dan menginterpolasi nilai NaN dalam array 2D (dengan teknik interpolasi berbasis kubik).
    Fungsi ini juga mencetak informasi debug mengenai NaN.
    """
    # Membuat masker untuk nilai NaN
    nan_mask = np.isnan(data)

    if np.any(nan_mask):
        print(f"Band {band_idx + 1} mengandung NaN!")
        print(f"Jumlah NaN di band {band_idx + 1}: {np.sum(nan_mask)}")

        # Menyimpan data valid dan data yang perlu diinterpolasi
        valid_data = data[~nan_mask]
        nan_data = data[nan_mask]
        
        # Proses interpolasi dengan OpenCV (INTER_CUBIC)
        # Menggunakan cv2.resize untuk menginterpolasi bagian yang hilang (NaN) menggunakan interpolasi kubik.
        # Agar lebih mudah, kita bisa menganggap gambar ini sebagai citra dan menggunakan cv2.resize.
        
        # Membuat array penuh dengan NaN untuk digunakan oleh OpenCV
        temp_data = data.copy()
        temp_data[nan_mask] = 0  # Ganti NaN dengan 0 (atau nilai lain sesuai kebutuhan)

        # Ukuran gambar (H, W)
        height, width = data.shape

        # Gunakan cv2.INTER_CUBIC untuk interpolasi kubik pada seluruh gambar
        interpolated_image = cv2.resize(temp_data, (width, height), interpolation=cv2.INTER_CUBIC)

        # Menyisipkan kembali hasil interpolasi ke dalam array asli, menggantikan NaN
        data[nan_mask] = interpolated_image[nan_mask]

    return data

def process_tif(input_tif, output_tif):
    """
    Membuka file .tif, memeriksa dan menginterpolasi nilai NaN, lalu menyimpan hasilnya.
    """
    with rasterio.open(input_tif) as src:
        # Membaca data untuk setiap band
        bands = src.read()

        # Membaca metadata file .tif
        profile = src.profile

        # Memeriksa dan menginterpolasi setiap band secara terpisah
        for i in range(bands.shape[0]):
            band = bands[i]

            # Memeriksa dan menginterpolasi band jika ada NaN
            bands[i] = check_and_interpolate_nans(band, i)

        # Menyimpan citra yang sudah diinterpolasi ke file baru
        with rasterio.open(output_tif, 'w', **profile) as dst:
            dst.write(bands)

        print(f"Proses selesai. File output disimpan di: {output_tif}")

def process_folder(input_folder, output_folder):
    """
    Memproses semua file .tif dalam folder, memeriksa dan menginterpolasi nilai NaN.
    Hasilnya disimpan di folder output dengan nama file yang sama, ditambahkan '_interpolated' pada nama file.
    """
    # Membaca semua file di dalam folder
    for filename in os.listdir(input_folder):
        # Memastikan hanya memproses file .tif
        if filename.endswith('.tif'):
            input_tif = os.path.join(input_folder, filename)
            output_tif = os.path.join(output_folder, filename.replace('.tif', '.tif'))
            
            print(f"Memproses file: {input_tif}")
            process_tif(input_tif, output_tif)

# Direktori input dan output
input_folder_1 = 'data/test_images'
output_folder_1 = 'data_3/test_images_interpolated/'
input_folder_2 =  'data/train_images'
output_folder_2 = 'data_3/train_images_interpolated/'

# Memastikan folder output ada, jika tidak buat
if not os.path.exists(output_folder_1):
    os.makedirs(output_folder_1)
if not os.path.exists(output_folder_2):
    os.makedirs(output_folder_2)

# Memproses semua file di folder input
process_folder(input_folder_1, output_folder_1)
process_folder(input_folder_2, output_folder_2)


In [None]:
import os
import rasterio
import numpy as np
import cv2  # OpenCV untuk interpolasi
from rasterio.enums import Resampling

def resize_image_to_fixed_size(input_tif, output_tif, target_size=(512, 512)):
    """
    Membaca file .tif, resize semua band ke ukuran tetap (target_size), dan menyimpan hasilnya.
    """
    with rasterio.open(input_tif) as src:
        # Membaca data untuk semua band
        bands = src.read()
        
        # Membaca metadata file .tif
        profile = src.profile
        
        # Ukuran target
        target_width, target_height = target_size
        
        # Resize setiap band menggunakan cv2.INTER_CUBIC untuk kualitas yang lebih baik
        resized_bands = []
        for band in bands:
            resized_band = cv2.resize(band, (target_width, target_height), interpolation=cv2.INTER_CUBIC)
            resized_bands.append(resized_band)
        
        # Mengubah list kembali menjadi array 3D (jumlah band, tinggi, lebar)
        resized_bands = np.array(resized_bands)

        # Memperbarui profile untuk mencocokkan dimensi gambar baru
        profile.update(
            height=target_height,
            width=target_width,
            transform=src.transform * src.transform.scale((src.width / target_width), (src.height / target_height))
        )

        # Menyimpan gambar yang telah diresize ke file output
        with rasterio.open(output_tif, 'w', **profile) as dst:
            dst.write(resized_bands)

        print(f"Proses selesai. File output disimpan di: {output_tif}")

def process_folder(input_folder, output_folder, target_size=(512, 512)):
    """
    Memproses semua file .tif dalam folder, resize semua band ke ukuran tetap (target_size).
    Hasilnya disimpan di folder output dengan nama file yang sama, ditambahkan '_resized' pada nama file.
    """
    # Membaca semua file di dalam folder
    for filename in os.listdir(input_folder):
        # Memastikan hanya memproses file .tif
        if filename.endswith('.tif'):
            input_tif = os.path.join(input_folder, filename)
            output_tif = os.path.join(output_folder, filename.replace('.tif', '.tif'))
            
            print(f"Memproses file: {input_tif}")
            resize_image_to_fixed_size(input_tif, output_tif, target_size)

# Direktori input dan output
input_folder_1 = 'data_3/test_images_interpolated/'  
output_folder_1 = 'data_3/test/test_images_resized/'  
input_folder_2 = 'data_3/train_images_interpolated/'  
output_folder_2 = 'data_3/train/train_images_resized/'  

# Memastikan folder output ada, jika tidak buat
if not os.path.exists(output_folder_1):
    os.makedirs(output_folder_1)
if not os.path.exists(output_folder_2):
    os.makedirs(output_folder_2)

# Memproses semua file di folder input
process_folder(input_folder_1, output_folder_1, target_size=(512, 512))
process_folder(input_folder_2, output_folder_2, target_size=(512, 512))


In [None]:
import os
import rasterio
import numpy as np

def extract_bands_from_folder(input_folder, output_folder, bands_to_extract):
    """
    Ekstrak band tertentu dari semua file TIF dalam folder input dan simpan ke folder output.

    Parameters:
        input_folder (str): Path ke folder input berisi file TIF.
        output_folder (str): Path ke folder output untuk menyimpan file TIF hasil ekstraksi.
        bands_to_extract (list): Daftar band (1-based index) yang akan diekstrak.
    """
    # Pastikan folder output ada
    os.makedirs(output_folder, exist_ok=True)
    
    # Iterasi melalui semua file di folder input
    for filename in os.listdir(input_folder):
        if filename.endswith('.tif'):  # Proses hanya file TIF
            input_path = os.path.join(input_folder, filename)
            output_path = os.path.join(output_folder, filename)
            
            with rasterio.open(input_path) as src:
                # Periksa apakah band yang diminta ada dalam file
                if max(bands_to_extract) > src.count:
                    raise ValueError(f"File {filename} tidak memiliki semua band yang diminta.")
                
                # Baca data untuk band yang diminta
                bands_data = [src.read(band) for band in bands_to_extract]
                
                # Profil metadata untuk file output
                profile = src.profile
                profile.update(count=len(bands_to_extract))
                
                # Tulis hasil ke file TIF baru
                with rasterio.open(output_path, 'w', **profile) as dst:
                    for i, band_data in enumerate(bands_data, start=1):
                        dst.write(band_data, i)
            
            print(f"Processed {filename} -> {output_path}")

# Folder Input dan Output
input_folder_1 = 'data_3/test/test_images_resized/'
output_folder_1 = 'data_3/test/test_images_resized_3band/'
input_folder_2 = 'data_3/train/train_images_resized/'
output_folder_2 = 'data_3/train/train_images_resized_3band/'

# Band yang ingin diekstrak (1-based index)
bands = [4, 3, 2]

# Proses Folder
extract_bands_from_folder(input_folder_1, output_folder_1, bands)
extract_bands_from_folder(input_folder_2, output_folder_2, bands)


In [None]:
import os
import rasterio
import numpy as np
from skimage.exposure import equalize_hist

def min_max_scaler(data, new_min=0, new_max=1):
    """
    Melakukan normalisasi Min-Max pada data (nilai data dipetakan ke rentang baru).
    """
    old_min = np.min(data)
    old_max = np.max(data)

    if old_max - old_min == 0:
        return np.full_like(data, new_min)  # Jika semua nilai sama, return nilai konstan

    scaled_data = (data - old_min) / (old_max - old_min)  # Skala ke rentang [0, 1]
    scaled_data = scaled_data * (new_max - new_min) + new_min
    return scaled_data

def needs_histogram_equalization(data, threshold=0.8):
    """
    Mengecek apakah histogram equalization diperlukan.
    Jika distribusi data terlalu rata, maka tidak diperlukan.
    """
    hist, _ = np.histogram(data, bins=256, range=(0, 1), density=True)
    hist_entropy = -np.sum(hist * np.log2(hist + 1e-10))  # Entropi histogram
    max_entropy = np.log2(len(hist))  # Entropi maksimum
    normalized_entropy = hist_entropy / max_entropy

    return normalized_entropy < threshold

def apply_histogram_equalization(data):
    """
    Menerapkan histogram equalization pada data.
    """
    return equalize_hist(data)

def process_tif(input_tif, output_tif):
    """
    Membuka file .tif, menerapkan Min-Max normalisasi dan histogram equalization jika diperlukan.
    """
    with rasterio.open(input_tif) as src:
        bands = src.read()
        profile = src.profile
        
        for i in range(bands.shape[0]):
            # Min-Max Scaling
            scaled_band = min_max_scaler(bands[i], new_min=0, new_max=1)
            
            # Cek dan terapkan histogram equalization jika diperlukan
            if needs_histogram_equalization(scaled_band):
                print(f"Band {i+1}: Applying histogram equalization.")
                scaled_band = apply_histogram_equalization(scaled_band)
            else:
                print(f"Band {i+1}: Histogram equalization not needed.")

            bands[i] = scaled_band

        with rasterio.open(output_tif, 'w', **profile) as dst:
            dst.write(bands)

        print(f"Processing complete. Output saved to: {output_tif}")

def process_folder(input_folder, output_folder):
    """
    Memproses semua file .tif dalam folder, menerapkan Min-Max normalisasi dan histogram equalization.
    """
    for filename in os.listdir(input_folder):
        if filename.endswith('.tif'):
            input_tif = os.path.join(input_folder, filename)
            output_tif = os.path.join(output_folder, filename.replace('.tif', '_processed.tif'))
            
            print(f"Processing file: {input_tif}")
            process_tif(input_tif, output_tif)

# Direktori input dan output
input_folder_1 = 'data_3/test/test_images_resized_3band/'
output_folder_1 = 'data_3/test/test_images_norm/'
input_folder_2 = 'data_3/train/train_images_resized_3band/'
output_folder_2 = 'data_3/train/train_images_norm/'

# Memastikan folder output ada, jika tidak buat
if not os.path.exists(output_folder_1):
    os.makedirs(output_folder_1)
if not os.path.exists(output_folder_2):
    os.makedirs(output_folder_2)

# Memproses semua file di folder input
process_folder(input_folder_1, output_folder_1)
process_folder(input_folder_2, output_folder_2)


In [None]:
import os
import json
import numpy as np
import rasterio
import cv2  # Pastikan OpenCV terinstal

def create_masks_with_edges(annotations_file, images_dir, output_dir, resized_output_dir, target_size=(512, 512)):
    """
    Membuat masker background, land (field), dan edge (tepi) berdasarkan file anotasi dan meresize ke ukuran target.

    Args:
        annotations_file (str): Path ke file anotasi (JSON).
        images_dir (str): Path folder gambar asli (.tif).
        output_dir (str): Path folder output masker asli.
        resized_output_dir (str): Path folder output masker hasil resize.
        target_size (tuple): Ukuran target (height, width) untuk masker yang diresize.
    """
    # Pastikan folder output ada
    os.makedirs(output_dir, exist_ok=True)
    os.makedirs(resized_output_dir, exist_ok=True)

    with open(annotations_file, 'r') as file:
        data = json.load(file)

    # Dapatkan nama semua file gambar di folder
    image_files = set(os.listdir(images_dir))

    for image_data in data['images']:
        file_name = image_data['file_name']

        # Periksa apakah file gambar ada di folder images
        if file_name not in image_files:
            print(f"Warning: File gambar {file_name} tidak ditemukan di {images_dir}. Melewati...")
            continue
        
        annotations = image_data['annotations']

        # Baca file .tif multiband untuk mendapatkan dimensi asli
        image_path = os.path.join(images_dir, file_name)
        with rasterio.open(image_path) as src:
            original_height, original_width = src.height, src.width

        # Membuat canvas kosong untuk masker (0 = background, 1 = land, 2 = edge)
        mask = np.zeros((original_height, original_width), dtype=np.uint8)

        for annotation in annotations:
            segmentation = np.array(annotation['segmentation']).reshape((-1, 2))

            # Menggambar field masking
            cv2.fillPoly(mask, [segmentation.astype(np.int32)], 1)

        # Membuat masker edge
        edge_mask = np.zeros_like(mask, dtype=np.uint8)
        contours, _ = cv2.findContours((mask == 1).astype(np.uint8), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
        cv2.drawContours(edge_mask, contours, -1, 1, thickness=2)  # Tepi diberi label 2 pada masker utama
        mask[edge_mask == 1] = 2

        # Simpan masker asli
        mask_output_path = os.path.join(output_dir, file_name.replace(".tif", ".npy"))
        np.save(mask_output_path, mask)

        # Resize masker ke ukuran target
        resized_mask = cv2.resize(mask, target_size[::-1], interpolation=cv2.INTER_NEAREST)

        # Simpan masker hasil resize
        resized_mask_output_path = os.path.join(resized_output_dir, file_name.replace(".tif", ".npy"))
        np.save(resized_mask_output_path, resized_mask)

        print(f"Masker untuk {file_name} berhasil dibuat dan diresize.")

# Jalankan fungsi untuk train dan test annotations
create_masks_with_edges(
    annotations_file="data/train_annotations.json",
    images_dir="data/train_images/",
    output_dir="data_3/train/masks/original",  # Folder untuk masker asli
    resized_output_dir="data_3/train/masks/resized",  # Folder untuk masker yang diresize
    target_size=(512, 512)
)
create_masks_with_edges(
    annotations_file="data/test_annotations.json",
    images_dir="data/test_images/",
    output_dir="data_3/test/masks/original",  # Folder untuk masker asli
    resized_output_dir="data_3/test/masks/resized",  # Folder untuk masker yang diresize
    target_size=(512, 512)
)


In [None]:
import os
import numpy as np
import rasterio
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap

def visualize_all_masks_with_images(image_dir, mask_dir):
    """
    Visualisasi semua gambar dan masker gabungan dalam folder.

    Args:
        image_dir (str): Path folder gambar asli (.tif).
        mask_dir (str): Path folder masker gabungan (.npy).
    """
    # Dapatkan semua file gambar dan mask
    image_files = [f for f in os.listdir(image_dir) if f.endswith(".tif")]
    mask_files = [f for f in os.listdir(mask_dir) if f.endswith(".npy")]

    # Ekstrak nama file tanpa ekstensi
    image_names = set(os.path.splitext(f)[0] for f in image_files)
    mask_names = set(os.path.splitext(f)[0] for f in mask_files)

    # Cari file yang cocok antara gambar dan mask
    common_names = image_names.intersection(mask_names)

    # Debug: Tampilkan file yang cocok
    print("File yang cocok:", common_names)

    if not common_names:
        print("Tidak ada file gambar dan mask yang cocok ditemukan.")
        return

    print(f"Menemukan {len(common_names)} pasangan gambar dan mask yang cocok.\n")

    # Buat colormap untuk 3 kelas
    cmap = ListedColormap(["black", "green", "yellow"])  # 0=background, 1=field, 2=edge

    for file_name in sorted(common_names):
        # Path gambar dan mask
        image_path = os.path.join(image_dir, file_name + ".tif")
        mask_path = os.path.join(mask_dir, file_name + ".npy")
        
        # Baca gambar asli menggunakan rasterio
        with rasterio.open(image_path) as src:
            image = src.read([1, 2, 3])  # Membaca band (R, G, B)
            image = np.transpose(image, (1, 2, 0))  # Ubah menjadi format (height, width, channels)

        # Baca masker
        mask = np.load(mask_path)
        
        # Plot gambar asli dan mask
        plt.figure(figsize=(12, 6))

        # Subplot 1: Gambar asli
        plt.subplot(1, 2, 1)
        plt.imshow(image)  # Tampilkan gambar asli
        plt.title(f"Original Image: {file_name}")
        plt.axis("off")

        # Subplot 2: Mask Overlay
        plt.subplot(1, 2, 2)
        plt.imshow(image)  # Overlay gambar asli
        plt.imshow(mask, cmap=cmap, alpha=0.5)  # Overlay mask dengan transparansi
        plt.title("Mask with 3 Classes Overlay")
        plt.axis("off")

        # Tampilkan hasil
        plt.tight_layout()
        plt.show()

# Path folder gambar asli dan masker gabungan
image_dir_1 = "data_3/train/train_images_norm/"  # Ganti dengan folder gambar Anda
mask_dir_1 = "data_3/train/masks/resized/"  # Ganti dengan folder mask Anda
image_dir_2 = "data_3/test/test_images_norm/"  # Ganti dengan folder gambar Anda
mask_dir_2 = "data_3/test/masks/resized/"  # Ganti dengan folder mask Anda

# Visualisasi semua gambar dan masker
visualize_all_masks_with_images(image_dir_1, mask_dir_1)
visualize_all_masks_with_images(image_dir_2, mask_dir_2)


In [None]:
import os
import numpy as np
import rasterio

def save_images_and_masks_to_numpy(image_dir, mask_dir, output_image_dir, output_mask_dir):
    """
    Membaca semua file gambar dan masker dalam folder, dan menyimpannya sebagai array NumPy ke folder terpisah.

    Args:
        image_dir (str): Path folder gambar asli (.tif).
        mask_dir (str): Path folder masker (.npy).
        output_image_dir (str): Path folder output untuk menyimpan array gambar.
        output_mask_dir (str): Path folder output untuk menyimpan array masker.
    """
    # Pastikan folder output ada
    os.makedirs(output_image_dir, exist_ok=True)
    os.makedirs(output_mask_dir, exist_ok=True)

    # Dapatkan semua file gambar dan masker
    image_files = [f for f in os.listdir(image_dir) if f.endswith(".tif")]
    mask_files = [f for f in os.listdir(mask_dir) if f.endswith(".npy")]

    # Ekstrak nama file tanpa ekstensi
    image_names = set(os.path.splitext(f)[0] for f in image_files)
    mask_names = set(os.path.splitext(f)[0] for f in mask_files)

    # Cari file yang cocok antara gambar dan mask
    common_names = sorted(image_names.intersection(mask_names))

    if not common_names:
        print("Tidak ada file gambar dan mask yang cocok ditemukan.")
        return

    print(f"Menemukan {len(common_names)} pasangan gambar dan mask yang cocok.\n")

    for file_name in common_names:
        # Path gambar dan mask
        image_path = os.path.join(image_dir, file_name + ".tif")
        mask_path = os.path.join(mask_dir, file_name + ".npy")

        # Output path untuk file .npy
        image_output_path = os.path.join(output_image_dir, file_name + ".npy")
        mask_output_path = os.path.join(output_mask_dir, file_name + ".npy")
        
        # Baca gambar asli menggunakan rasterio
        with rasterio.open(image_path) as src:
            image = src.read([1,2,3])  # Membaca band (R, G, B)
            image = np.transpose(image, (1, 2, 0))  # Ubah menjadi format (height, width, channels)
        
        # Simpan gambar ke file .npy
        np.save(image_output_path, image)

        # Baca masker
        mask = np.load(mask_path)
        
        # Simpan masker ke file .npy
        np.save(mask_output_path, mask)

        print(f"Disimpan: {image_output_path} dan {mask_output_path}")

# Path folder gambar asli dan masker
image_dir = "data_3/train/train_images_norm/"  # Ganti dengan folder gambar Anda
mask_dir = "data_3/train/masks/resized/"  # Ganti dengan folder mask Anda

# Folder output untuk file NumPy
output_image_dir = "data_3/train/images_npy/"
output_mask_dir = "data_3/train/masks_npy/"

image_dir_2 = "data_3/test/test_images_norm/"  # Ganti dengan folder gambar Anda
mask_dir_2 = "data_3/test/masks/resized/"  # Ganti dengan folder mask Anda

# Folder output untuk file NumPy
output_image_dir_2 = "data_3/test/images_npy/"
output_mask_dir_2 = "data_3/test/masks_npy/"


# Jalankan fungsi
save_images_and_masks_to_numpy(image_dir, mask_dir, output_image_dir, output_mask_dir)
save_images_and_masks_to_numpy(image_dir_2, mask_dir_2, output_image_dir_2, output_mask_dir_2)


In [None]:
import numpy as np

# Path file masker dan gambar asli dalam format .npy
mask_path = "data_3/train/masks_npy/train_1.npy"
image_path = "data_3/train/images_npy/train_1.npy"

# Baca masker
mask = np.load(mask_path)
mask_shape = mask.shape
print(f"Mask shape: {mask_shape}")

# Baca gambar asli
image = np.load(image_path)
image_shape = image.shape[:2]  # Hanya dimensi spatial (height, width)
image_band_count = image.shape[2] if len(image.shape) == 3 else 1  # Hitung jumlah band
print(f"Image shape: {image_shape}")
print(f"Image band count: {image_band_count}")

# Validasi shape
if mask_shape == image_shape:
    print("Status: OK (Shapes match)")
else:
    print("Status: ERROR (Shapes do not match)")


In [None]:
import os
import numpy as np

def check_dataset_balance(mask_dir):
    """
    Memeriksa keseimbangan dataset berdasarkan distribusi label di masker.

    Args:
        mask_dir (str): Path folder masker (.npy).

    Returns:
        None: Mencetak distribusi label.
    """
    total_pixels = 0
    background_pixels = 0
    field_pixels = 0
    edge_pixels = 0

    mask_files = [f for f in os.listdir(mask_dir) if f.endswith(".npy")]

    for mask_file in mask_files:
        mask_path = os.path.join(mask_dir, mask_file)
        mask = np.load(mask_path)

        total_pixels += mask.size
        background_pixels += (mask == 0).sum()
        field_pixels += (mask == 1).sum()
        edge_pixels += (mask == 2).sum()

    # Hitung rasio untuk setiap kelas
    background_ratio = background_pixels / total_pixels * 100
    field_ratio = field_pixels / total_pixels * 100
    edge_ratio = edge_pixels / total_pixels * 100

    print("=== Dataset Balance Check ===")
    print(f"Total pixels: {total_pixels}")
    print(f"Background pixels: {background_pixels} ({background_ratio:.2f}%)")
    print(f"Field pixels: {field_pixels} ({field_ratio:.2f}%)")
    print(f"Edge pixels: {edge_pixels} ({edge_ratio:.2f}%)")

    # Periksa keseimbangan dataset
    if max(background_ratio, field_ratio, edge_ratio) > 75:
        print("Warning: Dataset is imbalanced. Consider using weighted loss or data augmentation.")
    else:
        print("Dataset is balanced.")

# Path folder masker
mask_dir = "data_3/train/masks_npy/"

# Jalankan fungsi
check_dataset_balance(mask_dir)


In [None]:
import os
import numpy as np

def check_balance_per_image(mask_dir, threshold=75):
    """
    Memeriksa keseimbangan dataset pada tingkat gambar, dan mengidentifikasi gambar outlier.

    Args:
        mask_dir (str): Path folder masker (.npy).
        threshold (float): Batas persentase maksimum untuk suatu kelas agar dianggap adil.

    Returns:
        None: Mencetak distribusi kelas per gambar dan memberikan peringatan untuk outlier.
    """
    mask_files = [f for f in os.listdir(mask_dir) if f.endswith(".npy")]
    outliers = []

    print("=== Balance Check Per Image ===")
    print(f"Checking {len(mask_files)} mask files...\n")

    for mask_file in mask_files:
        mask_path = os.path.join(mask_dir, mask_file)
        mask = np.load(mask_path)

        total_pixels = mask.size
        background_pixels = (mask == 0).sum()
        field_pixels = (mask == 1).sum()
        edge_pixels = (mask == 2).sum()

        # Hitung persentase setiap kelas
        background_ratio = background_pixels / total_pixels * 100
        field_ratio = field_pixels / total_pixels * 100
        edge_ratio = edge_pixels / total_pixels * 100

        # Tampilkan hasil untuk setiap gambar
        print(f"Image: {mask_file}")
        print(f"  Background: {background_ratio:.2f}%")
        print(f"  Field: {field_ratio:.2f}%")
        print(f"  Edge: {edge_ratio:.2f}%")

        # Identifikasi outlier
        if max(background_ratio, field_ratio, edge_ratio) > threshold:
            print("  -> Warning: This image is imbalanced (outlier).")
            outliers.append(mask_file)

        print()

    # Tampilkan hasil outlier
    print("=== Summary ===")
    if outliers:
        print(f"Found {len(outliers)} outlier images:")
        for outlier in outliers:
            print(f"  - {outlier}")
    else:
        print("No outlier images detected. Dataset is balanced per image.")

# Path folder masker
mask_dir = "data_3/train/masks_npy/"  # Ganti dengan path folder masker Anda

# Jalankan fungsi
check_balance_per_image(mask_dir, threshold=75)
