In [1]:
# ============================================
# YOLOv8 Model Optimization for Jetson Nano
# Run this in Google Colab
# ============================================

# STEP 1: Mount Google Drive
print("="*60)
print("STEP 1: Mounting Google Drive")
print("="*60)

from google.colab import drive
drive.mount('/content/drive')

# STEP 2: Install Required Libraries
print("\n" + "="*60)
print("STEP 2: Installing Dependencies")
print("="*60)

!pip install ultralytics -q
!pip install onnx onnxsim -q

# STEP 3: Check GPU Availability
print("\n" + "="*60)
print("STEP 3: Checking GPU")
print("="*60)

import torch
print(f"CUDA Available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU Name: {torch.cuda.get_device_name(0)}")
    print(f"GPU Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB")
else:
    print("‚ö†Ô∏è WARNING: No GPU detected. Optimization will be slower.")

# STEP 4: Locate Your Model
print("\n" + "="*60)
print("STEP 4: Model Location Setup")
print("="*60)

# UPDATE THIS PATH to where your yolov8m.pt is located in Google Drive
MODEL_PATH = '/content/drive/MyDrive/yolov8m.pt'  # <-- CHANGE THIS PATH

# Check if model exists
import os
if os.path.exists(MODEL_PATH):
    print(f"‚úÖ Model found: {MODEL_PATH}")
    file_size = os.path.getsize(MODEL_PATH) / (1024*1024)
    print(f"   File size: {file_size:.2f} MB")
else:
    print(f"‚ùå Model NOT found at: {MODEL_PATH}")
    print("\nPlease update MODEL_PATH variable above to match your file location.")
    print("Example paths:")
    print("  - '/content/drive/MyDrive/yolov8m.pt'")
    print("  - '/content/drive/MyDrive/models/yolov8m.pt'")
    raise FileNotFoundError(f"Model not found: {MODEL_PATH}")

# STEP 5: Create Output Directory
print("\n" + "="*60)
print("STEP 5: Creating Output Directory")
print("="*60)

OUTPUT_DIR = '/content/drive/MyDrive/optimized_models'
os.makedirs(OUTPUT_DIR, exist_ok=True)
print(f"‚úÖ Output directory: {OUTPUT_DIR}")

# STEP 6: Load Model
print("\n" + "="*60)
print("STEP 6: Loading YOLOv8 Model")
print("="*60)

from ultralytics import YOLO

model = YOLO(MODEL_PATH)
print(f"‚úÖ Model loaded successfully")
print(f"   Model type: {model.model.__class__.__name__}")

# STEP 7: Export to Different Formats
print("\n" + "="*60)
print("STEP 7: Exporting Optimized Models")
print("="*60)

# ---------------------------------------------
# Export 1: ONNX (Universal format)
# ---------------------------------------------
print("\n[1/4] Exporting to ONNX...")
print("   What: Universal neural network format")
print("   Optimization: Graph simplification")
print("   Quantization: None (FP32)")
print("   Best for: Portability, intermediate format")

try:
    onnx_path = model.export(
        format='onnx',
        simplify=True,  # Simplifies the computation graph
        dynamic=False,   # Fixed input size for better optimization
        imgsz=640        # Input size
    )
    print(f"   ‚úÖ ONNX export successful: {onnx_path}")

    # Copy to Google Drive
    import shutil
    onnx_filename = os.path.basename(onnx_path)
    drive_onnx = os.path.join(OUTPUT_DIR, onnx_filename)
    shutil.copy(onnx_path, drive_onnx)
    print(f"   ‚úÖ Saved to Drive: {drive_onnx}")
except Exception as e:
    print(f"   ‚ùå ONNX export failed: {e}")

# ---------------------------------------------
# Export 2: TensorRT FP16 (RECOMMENDED)
# ---------------------------------------------
print("\n[2/4] Exporting to TensorRT FP16...")
print("   What: NVIDIA optimized inference engine")
print("   Optimization: Layer fusion, kernel auto-tuning, memory optimization")
print("   Quantization: FP16 (16-bit floating point)")
print("   Speed: ~2-3x faster than PyTorch")
print("   Accuracy: ~99% of original (minimal loss)")
print("   Best for: Jetson Nano (RECOMMENDED)")

try:
    trt_fp16_path = model.export(
        format='engine',
        half=True,       # Enable FP16 quantization
        device=0,        # Use GPU
        workspace=4,     # Max workspace size in GB
        imgsz=640
    )
    print(f"   ‚úÖ TensorRT FP16 export successful: {trt_fp16_path}")

    # Copy to Google Drive
    trt_filename = os.path.basename(trt_fp16_path)
    drive_trt = os.path.join(OUTPUT_DIR, trt_filename.replace('.engine', '_fp16.engine'))
    shutil.copy(trt_fp16_path, drive_trt)
    print(f"   ‚úÖ Saved to Drive: {drive_trt}")
except Exception as e:
    print(f"   ‚ùå TensorRT FP16 export failed: {e}")
    print("   Note: TensorRT exports are GPU-specific. Re-export on Jetson for best compatibility.")

# ---------------------------------------------
# Export 3: TensorRT INT8 (ADVANCED)
# ---------------------------------------------
print("\n[3/4] Exporting to TensorRT INT8...")
print("   What: NVIDIA optimized with integer quantization")
print("   Optimization: FP16 optimizations + INT8 quantization")
print("   Quantization: INT8 (8-bit integers)")
print("   Speed: ~3-4x faster than PyTorch")
print("   Accuracy: ~97-98% of original (1-3% loss)")
print("   Requires: Calibration dataset")
print("   Best for: Maximum performance (if accuracy acceptable)")

try:
    # Download calibration dataset (COCO128 - small subset)
    print("   Downloading calibration dataset...")
    !wget -q https://ultralytics.com/assets/coco128.zip
    !unzip -q coco128.zip

    # Create data.yaml for calibration
    with open('coco128.yaml', 'w') as f:
        f.write("""
path: coco128
train: images/train2017
val: images/train2017

nc: 80
names: ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat',
        'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat',
        'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack',
        'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball',
        'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket',
        'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple',
        'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair',
        'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote',
        'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book',
        'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush']
""")

    print("   Starting INT8 calibration (this may take 5-10 minutes)...")
    trt_int8_path = model.export(
        format='engine',
        int8=True,           # Enable INT8 quantization
        data='coco128.yaml', # Calibration dataset
        device=0,
        workspace=4,
        imgsz=640,
        batch=1
    )
    print(f"   ‚úÖ TensorRT INT8 export successful: {trt_int8_path}")

    # Copy to Google Drive
    trt_int8_filename = os.path.basename(trt_int8_path)
    drive_trt_int8 = os.path.join(OUTPUT_DIR, trt_int8_filename.replace('.engine', '_int8.engine'))
    shutil.copy(trt_int8_path, drive_trt_int8)
    print(f"   ‚úÖ Saved to Drive: {drive_trt_int8}")
except Exception as e:
    print(f"   ‚ùå TensorRT INT8 export failed: {e}")
    print("   Tip: INT8 export requires GPU and may fail on some Colab instances")

# ---------------------------------------------
# Export 4: Smaller Input Sizes (320x320)
# ---------------------------------------------
print("\n[4/4] Exporting TensorRT FP16 with 320x320 input...")
print("   What: Same as #2 but with smaller input resolution")
print("   Optimization: Same FP16 optimizations")
print("   Quantization: FP16")
print("   Speed: ~2x faster than 640x640 (total ~4-6x vs PyTorch)")
print("   Trade-off: Slightly less accurate for small objects")
print("   Best for: Real-time applications on Jetson")

try:
    # Reload model to reset export settings
    model = YOLO(MODEL_PATH)

    trt_320_path = model.export(
        format='engine',
        half=True,
        device=0,
        workspace=4,
        imgsz=320  # Smaller input size
    )
    print(f"   ‚úÖ TensorRT FP16 320x320 export successful: {trt_320_path}")

    # Copy to Google Drive
    trt_320_filename = os.path.basename(trt_320_path)
    drive_trt_320 = os.path.join(OUTPUT_DIR, trt_320_filename.replace('.engine', '_fp16_320.engine'))
    shutil.copy(trt_320_path, drive_trt_320)
    print(f"   ‚úÖ Saved to Drive: {drive_trt_320}")
except Exception as e:
    print(f"   ‚ùå TensorRT 320x320 export failed: {e}")

# STEP 8: Summary
print("\n" + "="*60)
print("STEP 8: Export Summary")
print("="*60)

print("\nüì¶ Exported Models:")
print("-" * 60)

exported_files = []
for file in os.listdir(OUTPUT_DIR):
    if file.startswith('yolov8'):
        file_path = os.path.join(OUTPUT_DIR, file)
        file_size = os.path.getsize(file_path) / (1024*1024)
        exported_files.append((file, file_size))
        print(f"‚úÖ {file:40s} ({file_size:6.2f} MB)")

if not exported_files:
    print("‚ùå No models were exported successfully")
else:
    print("-" * 60)
    print(f"Total models exported: {len(exported_files)}")

# STEP 9: What to Do Next
print("\n" + "="*60)
print("STEP 9: Next Steps for Jetson Nano")
print("="*60)

print("""
üìã OPTIMIZATIONS APPLIED:

1. ONNX Export:
   - Graph simplification
   - No quantization (FP32)
   - Portable format

2. TensorRT FP16 (640x640): ‚≠ê RECOMMENDED
   - Quantization: FP32 ‚Üí FP16 (16-bit)
   - Layer fusion and kernel optimization
   - ~2-3x faster, <1% accuracy loss
   - File: *_fp16.engine

3. TensorRT INT8 (640x640): (If successful)
   - Quantization: FP32 ‚Üí INT8 (8-bit)
   - Calibrated on COCO128 dataset
   - ~3-4x faster, 1-3% accuracy loss
   - File: *_int8.engine

4. TensorRT FP16 (320x320): ‚≠ê BEST FOR REALTIME
   - Same as #2 but smaller input
   - ~4-6x faster than original
   - Best balance for Jetson Nano
   - File: *_fp16_320.engine

üì• TRANSFER TO JETSON:

1. Download files from Google Drive:
""")
print(f"   {OUTPUT_DIR}")

print("""
2. Transfer via SCP (from your PC):
   scp optimized_models/*.onnx jetson@<jetson-ip>:~/models/
   scp optimized_models/*.engine jetson@<jetson-ip>:~/models/

3. Or use USB drive / direct download from Drive

‚ö†Ô∏è IMPORTANT NOTES:

- .engine files are GPU-specific!
- Engine files from Colab may NOT work on Jetson
- SOLUTION: Transfer .onnx file and convert on Jetson:

  On Jetson Nano:
  from ultralytics import YOLO
  model = YOLO('yolov8m.onnx')
  model.export(format='engine', half=True)

üöÄ RECOMMENDED WORKFLOW:

1. Transfer yolov8m.onnx to Jetson (most portable)
2. On Jetson, convert ONNX ‚Üí TensorRT FP16:
   python3 -c "from ultralytics import YOLO; YOLO('yolov8m.onnx').export(format='engine', half=True, imgsz=320)"
3. Use the generated .engine file for inference

üí° Expected Performance on Jetson Nano:
   - Original .pt (FP32, 640x640): 1-2 FPS
   - TensorRT FP16 (640x640): 3-5 FPS
   - TensorRT FP16 (320x320): 8-12 FPS ‚≠ê BEST
   - TensorRT INT8 (320x320): 10-15 FPS (if calibrated well)
""")

print("="*60)
print("‚úÖ OPTIMIZATION COMPLETE!")
print("="*60)

# Display file locations
print(f"\nüìÇ All optimized models saved to:")
print(f"   {OUTPUT_DIR}")
print("\nYou can download them from your Google Drive now!")

STEP 1: Mounting Google Drive
Mounted at /content/drive

STEP 2: Installing Dependencies
[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m1.1/1.1 MB[0m [31m34.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m21.0/21.0 MB[0m [31m109.1 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m18.2/18.2 MB[0m [31m99.7 MB/s[0m eta [36m0:00:00[0m
[?25h  Building wheel for onnxsim (setup.py) ... [?25l[?25hdone

STEP 3: Checking GPU
CUDA Available: True
GPU Name: Tesla T4
GPU Memory: 15.83 GB

STEP 4: Model Location Setup
‚úÖ Model found: /content/drive/MyDrive/yolov8m.pt
   File size: 49.70 MB

ST