In [58]:
!pip install ultralytics
!pip install opencv-python-headless
!pip install albumentations
!pip install labelImg



In [59]:
import torch
from ultralytics import YOLO
import os
import glob
from pathlib import Path

In [60]:
print("CUDA:", torch.cuda.is_available())
print("GPU:", torch.cuda.get_device_name(0))
torch.cuda.empty_cache()

CUDA: True
GPU: NVIDIA GeForce RTX 3050 6GB Laptop GPU


In [61]:

dataset_path = "/home/muliaandiki/project/NutriPlate/preprocessing/data/Nutritionverse"

# 1. HAPUS SEMUA CACHE
print("\n1. üóëÔ∏è MENGHAPUS CACHE...")
for root, dirs, files in os.walk(dataset_path):
    for file in files:
        if file.endswith('.cache'):
            cache_file = os.path.join(root, file)
            os.remove(cache_file)
            print(f"   Deleted: {cache_file}")

# 2. CEK STRUKTUR FOLDER
print("\n2. üìÅ CEK STRUKTUR FOLDER...")
required_folders = [
    f"{dataset_path}/train/images",
    f"{dataset_path}/train/labels", 
    f"{dataset_path}/valid/images",
    f"{dataset_path}/valid/labels"
]

for folder in required_folders:
    exists = os.path.exists(folder)
    status = "‚úÖ" if exists else "‚ùå"
    print(f"   {status} {folder}")
    
    if exists:
        files = os.listdir(folder)
        print(f"      File count: {len(files)}")
        if files:
            print(f"      Sample: {files[0]}")

# 3. CEK DATA.YAML
print("\n3. üìÑ CEK DATA.YAML...")
yaml_path = f"{dataset_path}/data.yaml"
if os.path.exists(yaml_path):
    with open(yaml_path, 'r') as f:
        content = f.read()
        print(f"   ‚úÖ File exists")
        print(f"   Content:\n{content}")
else:
    print(f"   ‚ùå File tidak ditemukan!")

# 4. CEK FORMAT LABEL FILE DETAIL
print("\n4. üîç CEK FORMAT LABEL DETAIL...")
label_dir = f"{dataset_path}/train/labels"
label_files = [f for f in os.listdir(label_dir) if f.endswith('.txt')][:3]  # Ambil 3 sample

for label_file in label_files:
    filepath = os.path.join(label_dir, label_file)
    print(f"\n   üìÑ {label_file}:")
    
    with open(filepath, 'r') as f:
        content = f.read().strip()
        
    if not content:
        print("      ‚ö†Ô∏è FILE KOSONG!")
        continue
    
    lines = content.split('\n')
    print(f"      Total lines: {len(lines)}")
    
    for i, line in enumerate(lines[:2]):  # Cek 2 baris pertama
        line = line.strip()
        if not line:
            continue
            
        print(f"      Line {i+1}: '{line}'")
        parts = line.split()
        print(f"        Parts: {len(parts)} -> {parts}")
        
        if len(parts) == 5:
            try:
                # Parse class_id
                class_id = parts[0]
                try:
                    class_int = int(class_id)
                    print(f"        Class ID: {class_int} (int)")
                except:
                    try:
                        class_float = float(class_id)
                        class_int = int(class_float)
                        print(f"        Class ID: {class_float} -> {class_int} (float->int)")
                    except:
                        print(f"        ‚ùå Class ID invalid: '{class_id}'")
                
                # Parse coordinates
                coords = []
                for j, coord in enumerate(parts[1:]):
                    try:
                        coord_val = float(coord)
                        coords.append(coord_val)
                        if not (0 <= coord_val <= 1):
                            print(f"        ‚ö†Ô∏è Coord {j+1} out of range: {coord_val}")
                    except:
                        print(f"        ‚ùå Coord {j+1} invalid: '{coord}'")
                
                print(f"        Coordinates: {coords}")
                
            except Exception as e:
                print(f"        ‚ùå Parse error: {e}")
        else:
            print(f"        ‚ùå Expected 5 parts, got {len(parts)}")

print("\n" + "="*80)


1. üóëÔ∏è MENGHAPUS CACHE...

2. üìÅ CEK STRUKTUR FOLDER...
   ‚úÖ /home/muliaandiki/project/NutriPlate/preprocessing/data/Nutritionverse/train/images
      File count: 1761
      Sample: aug_1_dish_36_IMG_6490_jpeg.rf.f01172709b66dfe1edfb825fdcd9e47a.jpg
   ‚úÖ /home/muliaandiki/project/NutriPlate/preprocessing/data/Nutritionverse/train/labels
      File count: 1761
      Sample: aug_1_dish_125_IMG_9311_jpeg.rf.26c07f4c2f2ba838f77644ab4ccb8b53.txt
   ‚úÖ /home/muliaandiki/project/NutriPlate/preprocessing/data/Nutritionverse/valid/images
      File count: 503
      Sample: dish_237_IMG_3334_jpeg.rf.914b26919aedba6ce36e2a76b1cc6dd7.jpg
   ‚úÖ /home/muliaandiki/project/NutriPlate/preprocessing/data/Nutritionverse/valid/labels
      File count: 503
      Sample: aug_0_dish_153_IMG_0326_jpeg.rf.2f697608a16ca484f17cf8b742a77d2f.txt

3. üìÑ CEK DATA.YAML...
   ‚úÖ File exists
   Content:
path: /home/muliaandiki/project/NutriPlate/preprocessing/data/Nutritionverse
train: train/images
val:

In [62]:
# CELL 2 - FIX SIMPLE ISSUES
print("5. üîß MEMPERBAIKI MASALAH UMUM...")

# A. Periksa apakah images dan labels match
train_img_dir = f"{dataset_path}/train/images"
train_lbl_dir = f"{dataset_path}/train/labels"

img_files = {os.path.splitext(f)[0] for f in os.listdir(train_img_dir) 
             if f.endswith(('.jpg', '.jpeg', '.png'))}
lbl_files = {os.path.splitext(f)[0] for f in os.listdir(train_lbl_dir) 
             if f.endswith('.txt')}

print(f"   Total image files: {len(img_files)}")
print(f"   Total label files: {len(lbl_files)}")

# Cek mismatch
missing_labels = img_files - lbl_files
missing_images = lbl_files - img_files

if missing_labels:
    print(f"   ‚ö†Ô∏è  {len(missing_labels)} images tanpa label:")
    for img in list(missing_labels)[:5]:
        print(f"      - {img}")
        
if missing_images:
    print(f"   ‚ö†Ô∏è  {len(missing_images)} labels tanpa image:")
    for lbl in list(missing_images)[:5]:
        print(f"      - {lbl}")

# B. Quick fix untuk class_id float
print(f"\n   üîÑ Quick fix class_id float...")
fixed_count = 0

for lbl_file in os.listdir(train_lbl_dir):
    if lbl_file.endswith('.txt'):
        filepath = os.path.join(train_lbl_dir, lbl_file)
        
        with open(filepath, 'r') as f:
            lines = f.readlines()
        
        new_lines = []
        changed = False
        
        for line in lines:
            line = line.strip()
            if not line:
                continue
                
            parts = line.split()
            if len(parts) == 5:
                # Fix class_id
                try:
                    class_str = parts[0]
                    # Coba parse sebagai float lalu ke int
                    try:
                        class_id = int(float(class_str))
                    except:
                        class_id = 0  # default
                    
                    # Buat line baru
                    new_line = f"{class_id} {parts[1]} {parts[2]} {parts[3]} {parts[4]}"
                    if new_line != line:
                        changed = True
                    new_lines.append(new_line)
                except:
                    new_lines.append(line)  # keep original if error
            else:
                new_lines.append(line)  # keep as is
        
        if changed and new_lines:
            with open(filepath, 'w') as f:
                f.write("\n".join(new_lines))
            fixed_count += 1

print(f"   ‚úÖ Fixed {fixed_count} label files")

print("\n" + "="*80)

5. üîß MEMPERBAIKI MASALAH UMUM...
   Total image files: 1761
   Total label files: 1761

   üîÑ Quick fix class_id float...
   ‚úÖ Fixed 0 label files



In [63]:
# CELL 3 - TRY ULTIMATE FIX
print("6. üöÄ COBA TRAINING DENGAN FIX ULTIMATE...")

from ultralytics import YOLO
import torch

# HAPUS CACHE LAGI
cache_file = f"{dataset_path}/train/labels.cache"
if os.path.exists(cache_file):
    os.remove(cache_file)
    print(f"   üóëÔ∏è  Cache dihapus lagi: {cache_file}")

# COBA DENGAN SETTING SANGAT SEDERHANA
print("\n   ‚öôÔ∏è  Setting training:")
print("      - Model: yolov8n.pt (kecil dulu)")
print("      - Epochs: 2")
print("      - Batch: 2")
print("      - Imgsz: 320")

try:
    model = YOLO('yolov8n.pt')
    
    results = model.train(
        data=f"{dataset_path}/data.yaml",
        epochs=2,
        imgsz=320,
        batch=2,
        device=0 if torch.cuda.is_available() else 'cpu',
        workers=1,  # Minimal workers untuk debugging
        verbose=True,
        project="runs/debug",
        name="ultimate_test",
        exist_ok=True,
        amp=False,  # Nonaktifkan AMP
        lr0=0.01,   # Learning rate default
        patience=0,  # No early stopping
        save=False,  # Tidak save model dulu
        cache=False, # Tidak pakai cache
    )
    
    print("\nüéâüéâüéâ BERHASIL! üéâüéâüéâ")
    print("Dataset kamu sudah FIX dan siap untuk training full!")
    
except Exception as e:
    print(f"\n‚ùå MASIH ERROR: {type(e).__name__}")
    print(f"   Message: {str(e)}")
    
    # EXTREME DEBUGGING
    print("\nüîß EXTREME DEBUGGING...")
    
    # Coba buka satu gambar
    import cv2
    sample_img = os.path.join(dataset_path, "train", "images", os.listdir(f"{dataset_path}/train/images")[0])
    print(f"\n   Coba buka gambar: {sample_img}")
    try:
        img = cv2.imread(sample_img)
        if img is not None:
            print(f"   ‚úÖ Gambar bisa dibuka: Shape {img.shape}")
        else:
            print(f"   ‚ùå Gambar tidak bisa dibuka!")
    except Exception as img_error:
        print(f"   ‚ùå Error buka gambar: {img_error}")
    
    # Coba manual dataset check
    print(f"\n   Coba check_det_dataset manual...")
    try:
        from ultralytics.data.utils import check_det_dataset
        data_info = check_det_dataset(f"{dataset_path}/data.yaml")
        print(f"   ‚úÖ check_det_dataset berhasil")
    except Exception as check_error:
        print(f"   ‚ùå check_det_dataset error: {check_error}")
        
    # SOLUSI RADICAL
    print(f"\nüí° SOLUSI RADICAL:")
    print(f"   1. Copy semua gambar ke format .jpg")
    print(f"   2. Gunakan yolov5 format")
    print(f"   3. Buat dataset baru dari scratch")

print("\n" + "="*80)

6. üöÄ COBA TRAINING DENGAN FIX ULTIMATE...

   ‚öôÔ∏è  Setting training:
      - Model: yolov8n.pt (kecil dulu)
      - Epochs: 2
      - Batch: 2
      - Imgsz: 320
[KDownloading https://github.com/ultralytics/assets/releases/download/v8.3.0/yolov8n.pt to 'yolov8n.pt': 100% ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ 6.2MB 324.0KB/s 19.7s.7s<0.1ss.1s
New https://pypi.org/project/ultralytics/8.3.240 available üòÉ Update with 'pip install -U ultralytics'
Ultralytics 8.3.236 üöÄ Python-3.12.3 torch-2.7.1+cu118 CUDA:0 (NVIDIA GeForce RTX 3050 6GB Laptop GPU, 5804MiB)
[34m[1mengine/trainer: [0magnostic_nms=False, amp=False, augment=False, auto_augment=randaugment, batch=2, bgr=0.0, box=7.5, cache=False, cfg=None, classes=None, close_mosaic=10, cls=0.5, compile=False, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=False, cutmix=0.0, data=/home/muliaandiki/project/NutriPlate/preprocessing/data/Nutritionverse/data.yaml, degrees=0.0, deterministic=True, device=0, dfl=1.5, dnn=False, 

In [64]:
# CELL 4 - JIKA BERHASIL, TRAINING FULL
print("7. üöÄ JIKA TEST BERHASIL, LANJUT TRAINING FULL...")

# Cek apakah test berhasil
test_dir = "runs/debug/ultimate_test"
if os.path.exists(test_dir):
    print(f"\n‚úÖ Test training berhasil ditemukan di: {test_dir}")
    
    # Hapus cache untuk training full
    cache_files = [
        f"{dataset_path}/train/labels.cache",
        f"{dataset_path}/valid/labels.cache"
    ]
    
    for cf in cache_files:
        if os.path.exists(cf):
            os.remove(cf)
            print(f"üóëÔ∏è  Hapus cache: {cf}")
    
    # Training full dengan model besar
    print(f"\nüöÄ Memulai training FULL dengan yolov8l.pt")
    
    try:
        model = YOLO('yolov8l.pt')
        
        model.train(
            data=f"{dataset_path}/data.yaml",
            epochs=100,
            imgsz=640,
            batch=4,  # Adjust sesuai GPU memory
            device=0,
            workers=4,
            verbose=True,
            project="runs/train",
            name="nutritionverse_full",
            exist_ok=True,
            amp=True,
            optimizer="AdamW",
            lr0=0.001,
            lrf=0.01,
            momentum=0.937,
            weight_decay=0.0005,
            warmup_epochs=3,
            warmup_momentum=0.8,
            box=7.5,
            cls=0.5,
            dfl=1.5,
            close_mosaic=10,
            resume=False,
            save=True,
            save_period=10,
            pretrained=True,
        )
        
        print(f"\nüéâüéâüéâ TRAINING FULL SELESAI! üéâüéâüéâ")
        
    except Exception as full_error:
        print(f"\n‚ùå Error training full: {full_error}")
        
else:
    print(f"\n‚ö†Ô∏è  Test belum berhasil, selesaikan debugging dulu")

print("\n" + "="*80)

7. üöÄ JIKA TEST BERHASIL, LANJUT TRAINING FULL...

‚úÖ Test training berhasil ditemukan di: runs/debug/ultimate_test
üóëÔ∏è  Hapus cache: /home/muliaandiki/project/NutriPlate/preprocessing/data/Nutritionverse/train/labels.cache
üóëÔ∏è  Hapus cache: /home/muliaandiki/project/NutriPlate/preprocessing/data/Nutritionverse/valid/labels.cache

üöÄ Memulai training FULL dengan yolov8l.pt
[KDownloading https://github.com/ultralytics/assets/releases/download/v8.3.0/yolov8l.pt to 'yolov8l.pt': 62% ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ 52.0/83.7MB 6.9MB/s 7.5s<4.6ss4s


KeyboardInterrupt: 