In [2]:
import torch
print("--- Tes Ketersediaan GPU PyTorch ---")
is_available = torch.cuda.is_available()
print(f"Apakah CUDA tersedia? -> {is_available}")

if not is_available:
    print("!! Peringatan: PyTorch tidak dapat mendeteksi GPU Anda. Pastikan driver NVIDIA dan CUDA Toolkit sudah benar.")
else:
            # 2. Dapatkan jumlah GPU yang terdeteksi
    gpu_count = torch.cuda.device_count()
    print(f"Jumlah GPU yang terdeteksi: {gpu_count}")

            # 3. Dapatkan nama GPU yang sedang aktif (GPU #0)
    gpu_name = torch.cuda.get_device_name(0)
    print(f"Nama GPU: {gpu_name}")

            # 4. Tes membuat tensor dan memindahkannya ke GPU
    print("\n--- Tes Penggunaan Memori GPU ---")
    try:
                # Buat sebuah tensor sederhana di CPU
        cpu_tensor = torch.randn(5, 3)
        print(f"Tensor dibuat di device: {cpu_tensor.device}")

                # Pindahkan tensor ke GPU
        gpu_tensor = cpu_tensor.to('cuda')
        print(f"Tensor berhasil dipindahkan ke device: {gpu_tensor.device}")
        print("Isi tensor di GPU:")
        print(gpu_tensor)

        print("\n‚úÖ Tes GPU berhasil! PyTorch bisa menggunakan GPU Anda.")

    except Exception as e:
        print(f"Terjadi error saat mencoba menggunakan GPU: {e}")
        print("\n‚ùå Tes GPU gagal.")

--- Tes Ketersediaan GPU PyTorch ---
Apakah CUDA tersedia? -> True
Jumlah GPU yang terdeteksi: 1
Nama GPU: NVIDIA GeForce RTX 5060

--- Tes Penggunaan Memori GPU ---
Tensor dibuat di device: cpu
Tensor berhasil dipindahkan ke device: cuda:0
Isi tensor di GPU:
tensor([[ 0.7970, -0.0774,  1.1627],
        [-0.1303, -1.3215,  0.0492],
        [ 0.6236, -0.2506, -0.4992],
        [-0.5724,  0.1147, -1.4978],
        [-0.2497, -0.7462, -0.7004]], device='cuda:0')

‚úÖ Tes GPU berhasil! PyTorch bisa menggunakan GPU Anda.


In [1]:
import torch

print("Number of GPU: ", torch.cuda.device_count())
print("GPU Name: ", torch.cuda.get_device_name())


device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Using device:', device)

Number of GPU:  1
GPU Name:  NVIDIA GeForce RTX 5060
Using device: cuda


In [None]:
import json
import os
import shutil
from sklearn.model_selection import train_test_split
import numpy as np

# -----------------------------------------------------------------------------
# FASE 1: DEFINISIKAN RENCANA HARMONISASI ANDA
# -----------------------------------------------------------------------------

# Daftar kelas master final yang Anda inginkan. ID harus dimulai dari 0.
MASTER_CATEGORIES = [
    {"id": 0, "name": "Angel/Manfish", "supercategory": "fish"},
    {"id": 1, "name": "Betta", "supercategory": "fish"},
    {"id": 2, "name": "Goldfish", "supercategory": "fish"},
    {"id": 3, "name": "Koi", "supercategory": "fish"},
    {"id": 4, "name": "Tetra", "supercategory": "fish"},
    {"id": 5, "name": "Botia", "supercategory": "fish"},
    {"id": 6, "name": "Molly", "supercategory": "fish"},
    {"id": 7, "name": "Guppy", "supercategory": "fish"},
    {"id": 8, "name": "Rainbow", "supercategory": "fish"},
    {"id": 9, "name": "Komet", "supercategory": "fish"},
    {"id": 10, "name": "Lemon Cichlid", "supercategory": "fish"},
    {"id": 11, "name": "Jewel Cichlid", "supercategory": "fish"},
    {"id": 12, "name": "Polar Blue Parrot", "supercategory": "fish"},
    {"id": 13, "name": "Tiger Barb", "supercategory": "fish"}
]

# Peta konversi dari ID dataset lama ke ID master BARU.
map_d1 = {
    1: 4, 2: 5, 3: 3, 4: 3, 5: 3, 6: 3, 7: 3, 8: 3, 9: 3, 10: 3,
    11: 3, 12: 3, 13: 3, 14: 3, 15: 3, 16: 3, 17: 2, 18: 2, 19: 2,
    20: 2, 21: 2, 22: 9, 23: 10, 24: 0, 25: 8
}
map_d2 = {
    1: 0, 2: 1, 3: 1, 4: 3, 5: 7, 6: 11, 7: 6, 8: 12, 9: 2, 10: 13
}

# Path dasar ke setiap dataset
datasets_info = [
    {
        "base_path": 'Dataset/Ornamental Fish',
        "id_map": map_d1
    },
    {
        "base_path": 'Dataset/Ornamental Freshwater Fish',
        "id_map": map_d2
    }
]

# Path output untuk dataset gabungan
output_path = './final_combined_dataset/'

print("Konfigurasi selesai. Siap untuk Fase 2.")

In [None]:
# -----------------------------------------------------------------------------
# FASE 2: FUNGSI DAN EKSEKUSI PENGGABUNGAN
# -----------------------------------------------------------------------------

def merge_coco_datasets_all_splits(datasets, master_categories, output_base_path):
    merged_data = {
        "images": [],
        "annotations": [],
        "categories": master_categories,
        "info": {"description": "Merged Dataset from multiple sources."},
        "licenses": []
    }

    image_id_offset = 0
    annotation_id_offset = 0
    
    # Buat folder sementara untuk semua gambar
    temp_images_all_path = os.path.join(output_base_path, 'images_all')
    os.makedirs(temp_images_all_path, exist_ok=True)
    print(f"Created temporary directory: {temp_images_all_path}")

    for dataset_info in datasets:
        base_path = dataset_info["base_path"]
        id_map = dataset_info["id_map"]
        print(f"\nProcessing dataset at base path: {base_path}")
        
        for split in ['train', 'valid', 'test']:
            json_path = os.path.join(base_path, split, '_annotations.coco.json')
            
            if not os.path.exists(json_path):
                print(f"  - Split '{split}' not found in '{base_path}', skipping.")
                continue
            
            # Path gambar bisa berada langsung di dalam folder split atau di subfolder 'images'
            image_dir = os.path.join(base_path, split)
            
            print(f"  + Merging split: '{split}'")
            with open(json_path, 'r') as f:
                data = json.load(f)

            image_filename_map = {img['id']: img['file_name'] for img in data['images']}
            
            current_image_id_map = {}
            for old_img_id, filename in image_filename_map.items():
                new_img_id = len(merged_data['images'])
                current_image_id_map[old_img_id] = new_img_id
                
                # Cari detail gambar
                img_details = next((item for item in data['images'] if item["id"] == old_img_id), None)
                if img_details:
                    img_details['id'] = new_img_id
                    merged_data['images'].append(img_details)
                
                # Salin file gambar
                src_img_path = os.path.join(image_dir, filename) # Roboflow V2 format
                if not os.path.exists(src_img_path): # Coba format lama jika tidak ditemukan
                    src_img_path = os.path.join(image_dir, 'images', filename)

                if os.path.exists(src_img_path):
                    shutil.copy2(src_img_path, os.path.join(temp_images_all_path, filename))
                else:
                    print(f"    [WARNING] Image not found, skipping copy: {src_img_path}")

            # Update anotasi
            for ann in data['annotations']:
                if ann['category_id'] not in id_map:
                    continue

                ann['id'] = len(merged_data['annotations'])
                ann['image_id'] = current_image_id_map[ann['image_id']]
                ann['category_id'] = id_map[ann['category_id']]
                merged_data['annotations'].append(ann)

    return merged_data

# Jalankan fungsi merge
print("--- STARTING MERGE PROCESS ---")
merged_coco = merge_coco_datasets_all_splits(datasets_info, MASTER_CATEGORIES, output_path)

# Simpan file COCO gabungan sementara
merged_json_path = os.path.join(output_path, '_merged_annotations.coco.json')
with open(merged_json_path, 'w') as f:
    json.dump(merged_coco, f, indent=2)

print("\n--- MERGE COMPLETE ---")
print(f"Total unique images merged: {len(merged_coco['images'])}")
print(f"Total harmonized annotations merged: {len(merged_coco['annotations'])}")
print(f"Merged annotations file saved to: {merged_json_path}")

In [None]:
# -----------------------------------------------------------------------------
# FASE 3: MEMBUAT SPLIT TRAIN/VALID/TEST BARU
# -----------------------------------------------------------------------------

def create_split_files(image_list, source_coco, split_name, output_dir):
    # Buat folder untuk split ini, termasuk subfolder 'images'
    img_output_path = os.path.join(output_dir, split_name)
    os.makedirs(img_output_path, exist_ok=True)
    
    split_coco = {
        "images": [],
        "annotations": [],
        "categories": source_coco['categories'],
        "info": source_coco.get('info', {}),
        "licenses": source_coco.get('licenses', [])
    }
    
    image_ids_in_split = {img['id'] for img in image_list}
    
    # 1. Salin gambar dan kumpulkan detail gambar
    print(f"\nCopying {len(image_list)} images for '{split_name}' split...")
    for img_details in image_list:
        split_coco['images'].append(img_details)
        
        src_path = os.path.join(output_dir, 'images_all', img_details['file_name'])
        dst_path = os.path.join(img_output_path, img_details['file_name'])
        if os.path.exists(src_path):
            shutil.copy2(src_path, dst_path)
        else:
            print(f"  [WARNING] Could not find source image to copy: {src_path}")
            
    # 2. Saring anotasi berdasarkan ID gambar
    split_coco['annotations'] = [ann for ann in source_coco['annotations'] if ann['image_id'] in image_ids_in_split]
            
    # 3. Simpan file JSON untuk split ini
    json_path = os.path.join(output_dir, split_name, '_annotations.coco.json')
    with open(json_path, 'w') as f:
        json.dump(split_coco, f, indent=2)
        
    print(f"Created '{split_name}' split: {len(split_coco['images'])} images, {len(split_coco['annotations'])} annotations.")
    print(f"Annotations saved to: {json_path}")

# --- Eksekusi Re-split ---
print("\n--- STARTING RE-SPLIT PROCESS ---")
all_images = merged_coco['images']

# Rasio: 70% Train, 20% Validation, 10% Test
train_val_images, test_images = train_test_split(all_images, test_size=0.1, random_state=42)
train_images, val_images = train_test_split(train_val_images, test_size=0.22, random_state=42) # 0.22 * 0.9 ‚âà 0.2

print(f"\nNew dataset sizes: Train={len(train_images)}, Valid={len(val_images)}, Test={len(test_images)}")

# Buat masing-masing dataset split
create_split_files(train_images, merged_coco, 'train', output_path)
create_split_files(val_images, merged_coco, 'valid', output_path)
create_split_files(test_images, merged_coco, 'test', output_path)

# Hapus folder & file sementara
shutil.rmtree(os.path.join(output_path, 'images_all'))
os.remove(os.path.join(output_path, '_merged_annotations.coco.json'))

print("\n--- PROCESS FINISHED! ---")
print(f"Your final, re-splitted dataset is ready in: '{output_path}'")

In [None]:
import os
import json
import shutil
import numpy as np
import random
from sklearn.model_selection import train_test_split

# 1. Setup reproducibility
np.random.seed(42)
random.seed(42)

# -----------------------------------------------------------------------------
# MASTER CATEGORIES & DATASET MAPPING
# -----------------------------------------------------------------------------
MASTER_CATEGORIES = [
    {"id": 0, "name": "Angel/Manfish", "supercategory": "fish"},
    {"id": 1, "name": "Betta", "supercategory": "fish"},
    {"id": 2, "name": "Goldfish", "supercategory": "fish"},
    {"id": 3, "name": "Koi", "supercategory": "fish"},
    {"id": 4, "name": "Tetra", "supercategory": "fish"},
    {"id": 5, "name": "Botia", "supercategory": "fish"},
    {"id": 6, "name": "Molly", "supercategory": "fish"},
    {"id": 7, "name": "Guppy", "supercategory": "fish"},
    {"id": 8, "name": "Rainbow", "supercategory": "fish"},
    {"id": 9, "name": "Komet", "supercategory": "fish"},
    {"id": 10, "name": "Lemon Cichlid", "supercategory": "fish"},
    {"id": 11, "name": "Jewel Cichlid", "supercategory": "fish"},
    {"id": 12, "name": "Polar Blue Parrot", "supercategory": "fish"},
    {"id": 13, "name": "Tiger Barb", "supercategory": "fish"}
]

# Dataset info
datasets_info = [
    {
        "base_path": 'Dataset/Ornamental Fish',
        "id_map": {
            1: 4, 2: 5, 3: 3, 4: 3, 5: 3, 6: 3, 7: 3, 8: 3, 9: 3, 10: 3,
            11: 3, 12: 3, 13: 3, 14: 3, 15: 3, 16: 3, 17: 2, 18: 2, 19: 2,
            20: 2, 21: 2, 22: 9, 23: 10, 24: 0, 25: 8
        }
    },
    {
        "base_path": 'Dataset/Ornamental Freshwater Fish',
        "id_map": {
            1: 0, 2: 1, 3: 1, 4: 3, 5: 7, 6: 11, 7: 6, 8: 12, 9: 2, 10: 13
        }
    }
]

output_path = './final_combined_dataset/'

# -----------------------------------------------------------------------------
# FASE 1: MERGE
# -----------------------------------------------------------------------------
def merge_coco_datasets(datasets, master_categories, output_base_path):
    merged_data = {
        "images": [],
        "annotations": [],
        "categories": master_categories,
        "info": {"description": "Merged ornamental fish dataset"},
        "licenses": []
    }
    image_id_offset = 0
    annotation_id_offset = 0

    temp_images_path = os.path.join(output_base_path, 'images_all')
    os.makedirs(temp_images_path, exist_ok=True)

    for dataset_idx, dataset_info in enumerate(datasets):
        base_path = dataset_info["base_path"]
        id_map = dataset_info["id_map"]
        prefix = f"ds{dataset_idx}_"

        print(f"\nMerging from {base_path}")
        for split in ['train', 'valid', 'test']:
            ann_path = os.path.join(base_path, split, '_annotations.coco.json')
            if not os.path.exists(ann_path):
                print(f"  - {split} annotations not found, skipping.")
                continue

            with open(ann_path, 'r') as f:
                data = json.load(f)

            # Remap images
            img_id_map = {}
            for img in data['images']:
                new_id = image_id_offset
                new_file_name = f"{prefix}{img['file_name']}"
                img_id_map[img['id']] = new_id
                merged_data['images'].append({
                    **img,
                    'id': new_id,
                    'file_name': new_file_name
                })
                src_img_path = os.path.join(base_path, split, img['file_name'])
                if not os.path.exists(src_img_path):
                    src_img_path = os.path.join(base_path, split, 'images', img['file_name'])
                if os.path.exists(src_img_path):
                    shutil.copy2(src_img_path, os.path.join(temp_images_path, new_file_name))
                else:
                    print(f"    [WARNING] Missing image: {src_img_path}")

                image_id_offset += 1

            # Remap annotations
            for ann in data['annotations']:
                if ann['category_id'] not in id_map:
                    continue
                new_ann = ann.copy()
                new_ann['id'] = annotation_id_offset
                new_ann['image_id'] = img_id_map[ann['image_id']]
                new_ann['category_id'] = id_map[ann['category_id']]
                merged_data['annotations'].append(new_ann)
                annotation_id_offset += 1

    return merged_data

merged_coco = merge_coco_datasets(datasets_info, MASTER_CATEGORIES, output_path)

# Simpan hasil merge sementara
merged_json_path = os.path.join(output_path, '_merged_annotations.coco.json')
with open(merged_json_path, 'w') as f:
    json.dump(merged_coco, f, indent=2)
print(f"\n‚úÖ Merge complete: {len(merged_coco['images'])} images, {len(merged_coco['annotations'])} annotations.")


# -----------------------------------------------------------------------------
# FASE 2: SPLIT
# -----------------------------------------------------------------------------
def create_split(split_name, images, annotations, categories, output_base_path):
    img_dir = os.path.join(output_base_path, split_name)
    os.makedirs(img_dir, exist_ok=True)

    # Copy images
    for img in images:
        src = os.path.join(output_base_path, 'images_all', img['file_name'])
        dst = os.path.join(img_dir, img['file_name'])
        if os.path.exists(src):
            shutil.copy2(src, dst)

    # Filter annotations
    img_ids = {img['id'] for img in images}
    filtered_anns = [ann for ann in annotations if ann['image_id'] in img_ids]

    # Save coco json
    split_coco = {
        "images": images,
        "annotations": filtered_anns,
        "categories": categories,
        "info": {},
        "licenses": []
    }
    with open(os.path.join(img_dir, '_annotations.coco.json'), 'w') as f:
        json.dump(split_coco, f, indent=2)
    print(f"{split_name}: {len(images)} images, {len(filtered_anns)} annotations.")

# Split
train_val, test = train_test_split(merged_coco['images'], test_size=0.1, random_state=42)
train, val = train_test_split(train_val, test_size=0.22, random_state=42)

print("\n--- Creating splits ---")
create_split('train', train, merged_coco['annotations'], MASTER_CATEGORIES, output_path)
create_split('valid', val, merged_coco['annotations'], MASTER_CATEGORIES, output_path)
create_split('test', test, merged_coco['annotations'], MASTER_CATEGORIES, output_path)

# Bersihkan sementara
shutil.rmtree(os.path.join(output_path, 'images_all'))
os.remove(merged_json_path)
print("\nüéâ Done! Dataset siap di folder:", output_path)
