In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

## 1. Setup Environment

In [None]:
# Fix NumPy 2.x compatibility FIRST
!pip install ultralytics --quiet
!pip install "numpy<2" --quiet

print("‚úÖ Ultralytics installed successfully!")
print("‚úÖ NumPy downgraded to 1.x for compatibility!")
print("‚ÑπÔ∏è  Using Kaggle pre-installed packages (torch, cv2, etc.)")

In [None]:
import os
import gc
import torch
import numpy as np
import pandas as pd
import yaml
import shutil
from pathlib import Path
from ultralytics import YOLO

# Verify GPU
print("="*60)
print("üîß SYSTEM INFORMATION")
print("="*60)
print(f"PyTorch version: {torch.__version__}")
print(f"NumPy version: {np.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")

if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")
    gpu_mem = torch.cuda.get_device_properties(0).total_memory / 1e9
    print(f"GPU Memory: {gpu_mem:.1f} GB")
    
    # Recommend batch size based on GPU memory
    if gpu_mem >= 15:  # P100 16GB
        recommended_batch = 16
    elif gpu_mem >= 10:
        recommended_batch = 12
    else:
        recommended_batch = 8
    print(f"\n‚úÖ Recommended batch size: {recommended_batch}")

In [None]:
# ============================================
# C·∫§U H√åNH ƒê∆Ø·ªúNG D·∫™N - FIX C·ª®NG CHO KAGGLE
# ============================================

DATASET_PATH = "/kaggle/input/waste-organic-inorganic-reclycable-yolov8/Final_dataset"
OUTPUT_DIR = "/kaggle/working"

# Verify dataset exists
print("="*60)
print("üìÅ DATASET VERIFICATION")
print("="*60)

if os.path.exists(DATASET_PATH):
    print(f"‚úÖ Dataset found at: {DATASET_PATH}")
    
    # Count images in each split
    for split in ['train', 'valid', 'test']:
        img_path = os.path.join(DATASET_PATH, split, 'images')
        if os.path.exists(img_path):
            count = len([f for f in os.listdir(img_path) if f.endswith(('.jpg', '.jpeg', '.png'))])
            print(f"   {split}: {count} images")
else:
    print(f"‚ùå Dataset NOT found at: {DATASET_PATH}")
    print("Please check the dataset path!")

## 2. Dataset Configuration

In [None]:
# 40 waste classes - c·∫≠p nh·∫≠t ƒë√∫ng v·ªõi dataset
CLASS_NAMES = {
    # Organic (0-32) - 33 classes
    0: 'Apple', 1: 'Apple-core', 2: 'Apple-peel', 3: 'Bone', 4: 'Bone-fish',
    5: 'Bread', 6: 'Bun', 7: 'Egg', 8: 'Egg-hard', 9: 'Egg-scramble',
    10: 'Egg-shell', 11: 'Egg-steam', 12: 'Egg-yolk', 13: 'Fish', 14: 'Meat',
    15: 'Mussel', 16: 'Mussel-shell', 17: 'Noodle', 18: 'Orange', 19: 'Orange-peel',
    20: 'Other-waste', 21: 'Pancake', 22: 'Pasta', 23: 'Pear', 24: 'Pear-core',
    25: 'Pear-peel', 26: 'Potato', 27: 'Rice', 28: 'Shrimp', 29: 'Shrimp-shell',
    30: 'Tofu', 31: 'Tomato', 32: 'Vegetable',
    # Inorganic (33-34) - 2 classes
    33: 'plastic_bag', 34: 'styrofoam',
    # Recyclable (35-39) - 5 classes
    35: 'Cardboard', 36: 'Glass', 37: 'Metal', 38: 'Paper', 39: 'Plastic'
}

# Convert dict to list for YOLO
CLASS_NAMES_LIST = [CLASS_NAMES[i] for i in range(len(CLASS_NAMES))]

print(f"\nüìä CLASS DISTRIBUTION:")
print(f"   Total classes: {len(CLASS_NAMES)}")
print(f"   Organic (0-32): 33 classes")
print(f"   Inorganic (33-34): 2 classes")
print(f"   Recyclable (35-39): 5 classes")

In [None]:
# Create data.yaml for training
data_yaml = {
    'path': DATASET_PATH,
    'train': 'train/images',
    'val': 'valid/images', 
    'test': 'test/images',
    'nc': len(CLASS_NAMES),
    'names': CLASS_NAMES_LIST
}

yaml_path = f"{OUTPUT_DIR}/data.yaml"
with open(yaml_path, 'w') as f:
    yaml.dump(data_yaml, f, default_flow_style=False)

print(f"‚úÖ Created data.yaml at: {yaml_path}")

# Verify yaml content
print("\nüìÑ data.yaml content:")
with open(yaml_path, 'r') as f:
    print(f.read()[:500] + "...")

## 3. Utility Functions

In [None]:
def clear_memory():
    """Gi·∫£i ph√≥ng GPU v√† RAM memory"""
    gc.collect()
    if torch.cuda.is_available():
        torch.cuda.empty_cache()
        torch.cuda.synchronize()
    
    # Print current memory status
    if torch.cuda.is_available():
        allocated = torch.cuda.memory_allocated() / 1e9
        reserved = torch.cuda.memory_reserved() / 1e9
        free = (torch.cuda.get_device_properties(0).total_memory / 1e9) - reserved
        print(f"‚úÖ Memory cleared! Free: {free:.2f}GB")

# Clear memory before training
clear_memory()

## 4. üöÄ Train YOLO11n - Optimized Configuration

In [None]:
print("="*60)
print("üöÄ TRAINING YOLO11n - OPTIMIZED FOR P100")
print("="*60)
print("\nüìã Training Configuration:")
print("   Model: YOLO11n (latest architecture)")
print("   Epochs: 150")
print("   Batch size: 16")
print("   Image size: 640")
print("   Optimizer: AdamW")
print("   Learning rate: 0.001 ‚Üí 0.0001 (cosine)")
print("   Augmentation: Strong (mosaic, mixup, hsv, flip)")
print("="*60)

# Load pretrained YOLO11n
model = YOLO('yolo11n.pt')

# ============================================
# TRAINING CONFIGURATION - OPTIMIZED FOR P100
# ============================================
results = model.train(
    # === Data Configuration ===
    data=yaml_path,
    
    # === Training Duration ===
    epochs=100,              # Train longer for better model
    patience=25,             # Early stopping patience
    
    # === Batch & Image Size ===
    batch=16,                # Optimal for P100 16GB
    imgsz=640,               # Standard YOLO size
    
    # === Hardware ===
    device=0,                # GPU 0
    workers=4,               # Data loading workers
    
    # === Optimizer Settings ===
    optimizer='AdamW',       # Best optimizer for transformers
    lr0=0.001,               # Initial learning rate
    lrf=0.01,                # Final LR = lr0 * lrf = 0.00001
    momentum=0.937,          # SGD momentum/Adam beta1
    weight_decay=0.0005,     # L2 regularization
    
    # === Warmup Settings ===
    warmup_epochs=5,         # Longer warmup for stability
    warmup_momentum=0.8,
    warmup_bias_lr=0.1,
    
    # === Loss Weights (tuned for waste detection) ===
    box=7.5,                 # Box loss weight
    cls=0.5,                 # Classification loss weight
    dfl=1.5,                 # Distribution focal loss weight
    
    # === Data Augmentation (STRONG) ===
    hsv_h=0.015,             # Hue augmentation
    hsv_s=0.7,               # Saturation augmentation
    hsv_v=0.4,               # Value augmentation
    degrees=10.0,            # Rotation
    translate=0.1,           # Translation
    scale=0.5,               # Scale augmentation
    shear=2.0,               # Shear
    perspective=0.0001,      # Perspective
    flipud=0.5,              # Vertical flip
    fliplr=0.5,              # Horizontal flip
    bgr=0.0,                 # BGR augmentation
    mosaic=1.0,              # Mosaic augmentation
    mixup=0.15,              # Mixup augmentation
    copy_paste=0.1,          # Copy-paste augmentation
    erasing=0.4,             # Random erasing
    crop_fraction=1.0,       # Crop fraction
    
    # === Memory & Performance ===
    cache=False,             # Don't cache to save RAM
    amp=True,                # Mixed precision training
    
    # === Output Settings ===
    project=f"{OUTPUT_DIR}/runs",
    name="yolo11n_waste_optimized",
    exist_ok=True,
    pretrained=True,
    save=True,
    save_period=20,          # Save checkpoint every 20 epochs
    plots=True,
    val=True,
    
    # === Advanced Settings ===
    cos_lr=True,             # Cosine learning rate scheduler
    close_mosaic=10,         # Disable mosaic last 10 epochs
    label_smoothing=0.1,     # Label smoothing for better generalization
    nbs=64,                  # Nominal batch size for loss normalization
    overlap_mask=True,
    mask_ratio=4,
    dropout=0.1,             # Dropout for regularization
    seed=42,                 # Reproducibility
)

print("\n" + "="*60)
print("‚úÖ TRAINING COMPLETED!")
print("="*60)