In [3]:
import os
import shutil
import numpy as np
from torchvision.datasets import ImageFolder

# Path dataset utama (52 kelas)
data_root = r"C:\Users\HP\novelty\dataset"
output_root = r"C:\Users\HP\novelty\split_datasets-1" 

# Daftar variasi pembagian (labeled, unlabeled)
splits = [
    (12, 40),
    (22, 30),
    (32, 20),
    (42, 10)
]

# Load seluruh dataset
full_dataset = ImageFolder(root=data_root)
class_names = np.array(full_dataset.classes)  # Daftar 52 kelas

for i, (n_labeled, n_unlabeled) in enumerate(splits):
    # Set seed unik untuk setiap split (42 + indeks)
    np.random.seed(1 + i)
    
    # Acak urutan kelas dengan seed unik
    shuffled_class_idx = np.random.permutation(len(class_names))
    
    # Pisahkan indeks kelas
    labeled_class_idx = shuffled_class_idx[:n_labeled]
    unlabeled_class_idx = shuffled_class_idx[n_labeled:]
    
    # Buat folder output
    split_name = f"{n_labeled}labeled_{n_unlabeled}unlabeled"
    os.makedirs(os.path.join(output_root, split_name, "labeled"), exist_ok=True)
    os.makedirs(os.path.join(output_root, split_name, "unlabeled"), exist_ok=True)
    
    # Salin file
    for class_idx in labeled_class_idx:
        class_name = class_names[class_idx]
        src_dir = os.path.join(data_root, class_name)
        dst_dir = os.path.join(output_root, split_name, "labeled", class_name)
        shutil.copytree(src_dir, dst_dir)
    
    for class_idx in unlabeled_class_idx:
        class_name = class_names[class_idx]
        src_dir = os.path.join(data_root, class_name)
        dst_dir = os.path.join(output_root, split_name, "unlabeled", class_name)
        shutil.copytree(src_dir, dst_dir)
    
    print(f"Split {split_name} selesai. Seed: {42+i}, Labeled: {n_labeled} kelas, Unlabeled: {n_unlabeled} kelas.")

Split 12labeled_40unlabeled selesai. Seed: 42, Labeled: 12 kelas, Unlabeled: 40 kelas.
Split 22labeled_30unlabeled selesai. Seed: 43, Labeled: 22 kelas, Unlabeled: 30 kelas.
Split 32labeled_20unlabeled selesai. Seed: 44, Labeled: 32 kelas, Unlabeled: 20 kelas.
Split 42labeled_10unlabeled selesai. Seed: 45, Labeled: 42 kelas, Unlabeled: 10 kelas.


In [4]:
import os
import shutil
import numpy as np
from torchvision.datasets import ImageFolder

# Path dataset utama (52 kelas)
data_root = r"C:\Users\HP\novelty\dataset"
output_root = r"C:\Users\HP\novelty\split_datasets" 

# Daftar variasi pembagian (labeled, unlabeled)
splits = [
    (12, 40),
    (22, 30),
    (32, 20),
    (42, 10)
]

# Load seluruh dataset
full_dataset = ImageFolder(root=data_root)
class_names = np.array(full_dataset.classes)  # Daftar 52 kelas

# Jumlah percobaan per variasi
num_trials = 5

for trial in range(num_trials):
    print(f"\n=== Trial {trial+1} ===")
    for i, (n_labeled, n_unlabeled) in enumerate(splits):
        # Gunakan seed berbeda per trial dan split
        seed = 1000 * trial + i  # contoh: 0, 1001, 2002, ...
        np.random.seed(seed)

        # Acak urutan kelas
        shuffled_class_idx = np.random.permutation(len(class_names))
        labeled_class_idx = shuffled_class_idx[:n_labeled]
        unlabeled_class_idx = shuffled_class_idx[n_labeled:]

        # Buat folder output untuk trial dan split ini
        split_name = f"{n_labeled}labeled_{n_unlabeled}unlabeled"
        split_output_dir = os.path.join(output_root, f"trial_{trial}", split_name)
        labeled_output = os.path.join(split_output_dir, "labeled")
        unlabeled_output = os.path.join(split_output_dir, "unlabeled")

        os.makedirs(labeled_output, exist_ok=True)
        os.makedirs(unlabeled_output, exist_ok=True)

        # Salin file berlabel
        for class_idx in labeled_class_idx:
            class_name = class_names[class_idx]
            src_dir = os.path.join(data_root, class_name)
            dst_dir = os.path.join(labeled_output, class_name)
            shutil.copytree(src_dir, dst_dir)

        # Salin file tak berlabel
        for class_idx in unlabeled_class_idx:
            class_name = class_names[class_idx]
            src_dir = os.path.join(data_root, class_name)
            dst_dir = os.path.join(unlabeled_output, class_name)
            shutil.copytree(src_dir, dst_dir)

        print(f"[Trial {trial}] Split {split_name} selesai. Seed: {seed}, Labeled: {n_labeled}, Unlabeled: {n_unlabeled}")



=== Trial 1 ===
[Trial 0] Split 12labeled_40unlabeled selesai. Seed: 0, Labeled: 12, Unlabeled: 40
[Trial 0] Split 22labeled_30unlabeled selesai. Seed: 1, Labeled: 22, Unlabeled: 30
[Trial 0] Split 32labeled_20unlabeled selesai. Seed: 2, Labeled: 32, Unlabeled: 20
[Trial 0] Split 42labeled_10unlabeled selesai. Seed: 3, Labeled: 42, Unlabeled: 10

=== Trial 2 ===
[Trial 1] Split 12labeled_40unlabeled selesai. Seed: 1000, Labeled: 12, Unlabeled: 40
[Trial 1] Split 22labeled_30unlabeled selesai. Seed: 1001, Labeled: 22, Unlabeled: 30
[Trial 1] Split 32labeled_20unlabeled selesai. Seed: 1002, Labeled: 32, Unlabeled: 20
[Trial 1] Split 42labeled_10unlabeled selesai. Seed: 1003, Labeled: 42, Unlabeled: 10

=== Trial 3 ===
[Trial 2] Split 12labeled_40unlabeled selesai. Seed: 2000, Labeled: 12, Unlabeled: 40
[Trial 2] Split 22labeled_30unlabeled selesai. Seed: 2001, Labeled: 22, Unlabeled: 30
[Trial 2] Split 32labeled_20unlabeled selesai. Seed: 2002, Labeled: 32, Unlabeled: 20
[Trial 2] Split