# üéØ Notebook 5: Inference & Submission

## HACKATHON SUBMISSION GENERATOR

This notebook generates **YOLO-format prediction files** for all test images and creates a **ZIP file for submission**.

**Output Format (per line):**
```
class_id x_center y_center width height confidence
```

---

## 1. Setup

In [None]:
!pip install -q ultralytics opencv-python Pillow matplotlib tqdm PyYAML

In [None]:
import os, json, zipfile, shutil
from pathlib import Path
import numpy as np
import yaml
import torch
from ultralytics import YOLO
from tqdm.notebook import tqdm
from PIL import Image

print(f"PyTorch: {torch.__version__}")
print(f"CUDA: {torch.cuda.is_available()}")

In [None]:
# Paths
try:
    import google.colab
    IN_COLAB = True
    PROJECT_ROOT = Path('/content')
except ImportError:
    IN_COLAB = False
    PROJECT_ROOT = Path('..')

DATASET_ROOT = PROJECT_ROOT / 'military_object_dataset'
CONFIG_DIR = PROJECT_ROOT / 'config'
MODELS_DIR = PROJECT_ROOT / 'models'
RESULTS_DIR = PROJECT_ROOT / 'results'
RESULTS_DIR.mkdir(exist_ok=True)

TEST_IMAGES = DATASET_ROOT / 'test' / 'images'

# Create output directories for submission
PREDICTIONS_DIR = RESULTS_DIR / 'yolo_predictions'
PREDICTIONS_DIR.mkdir(parents=True, exist_ok=True)

# Load class config
with open(CONFIG_DIR / 'dataset.yaml', 'r') as f:
    dataset_config = yaml.safe_load(f)

CLASS_NAMES = dataset_config['names']
NUM_CLASSES = dataset_config['nc']

print(f"Classes: {NUM_CLASSES}")
print(f"Test images dir: {TEST_IMAGES}")

## 2. Load Model

In [None]:
# Find best model
best_model_path = MODELS_DIR / 'best_model.pt'

if not best_model_path.exists():
    runs_dir = PROJECT_ROOT / 'runs' / 'detect'
    if runs_dir.exists():
        for exp_dir in sorted(runs_dir.iterdir(), reverse=True):
            candidate = exp_dir / 'weights' / 'best.pt'
            if candidate.exists():
                best_model_path = candidate
                break

if best_model_path.exists():
    model = YOLO(str(best_model_path))
    print(f"‚úÖ Loaded: {best_model_path}")
else:
    print("‚ö†Ô∏è No trained model! Run notebook 03 first.")
    model = YOLO('yolov8s.pt')  # Fallback

## 3. Generate YOLO-Format Predictions

**Format per line:** `class_id x_center y_center width height confidence`

All values normalized to [0, 1] relative to image dimensions.

In [None]:
def generate_yolo_predictions(model, test_images_dir: Path, output_dir: Path, conf_threshold: float = 0.25):
    """
    Generate YOLO-format .txt files for all test images.
    
    Format: class_id x_center y_center width height confidence
    All coordinates normalized to [0,1].
    """
    # Get all test images
    image_extensions = ['*.jpg', '*.jpeg', '*.png', '*.bmp']
    test_images = []
    for ext in image_extensions:
        test_images.extend(list(test_images_dir.glob(ext)))
    
    print(f"üì∏ Found {len(test_images)} test images")
    
    # Clear output directory
    for f in output_dir.glob('*.txt'):
        f.unlink()
    
    # Device
    device = 0 if torch.cuda.is_available() else 'cpu'
    
    total_detections = 0
    empty_count = 0
    
    for img_path in tqdm(test_images, desc="Generating predictions"):
        # Get image dimensions
        with Image.open(img_path) as img:
            img_width, img_height = img.size
        
        # Run inference
        results = model.predict(
            str(img_path),
            conf=conf_threshold,
            iou=0.45,
            imgsz=640,
            device=device,
            verbose=False
        )
        
        result = results[0]
        
        # Output file: same name as image but .txt extension
        txt_filename = img_path.stem + '.txt'
        txt_path = output_dir / txt_filename
        
        # Write predictions
        with open(txt_path, 'w') as f:
            if len(result.boxes) > 0:
                boxes = result.boxes.xyxy.cpu().numpy()  # [x1, y1, x2, y2]
                classes = result.boxes.cls.cpu().numpy().astype(int)
                confs = result.boxes.conf.cpu().numpy()
                
                for box, cls_id, conf in zip(boxes, classes, confs):
                    x1, y1, x2, y2 = box
                    
                    # Convert to YOLO format (normalized x_center, y_center, width, height)
                    x_center = ((x1 + x2) / 2) / img_width
                    y_center = ((y1 + y2) / 2) / img_height
                    width = (x2 - x1) / img_width
                    height = (y2 - y1) / img_height
                    
                    # Write: class_id x_center y_center width height confidence
                    f.write(f"{cls_id} {x_center:.6f} {y_center:.6f} {width:.6f} {height:.6f} {conf:.6f}\n")
                    total_detections += 1
            else:
                empty_count += 1
                # Empty file for images with no detections
                pass
    
    print(f"\n‚úÖ Generated {len(test_images)} prediction files")
    print(f"   Total detections: {total_detections}")
    print(f"   Empty predictions: {empty_count}")
    print(f"   Output: {output_dir}")
    
    return len(test_images)

In [None]:
# Generate predictions for ALL test images
print("üöÄ Generating YOLO-format predictions...\n")

num_predictions = generate_yolo_predictions(
    model=model,
    test_images_dir=TEST_IMAGES,
    output_dir=PREDICTIONS_DIR,
    conf_threshold=0.25
)

## 4. Create Submission ZIP

In [None]:
def create_submission_zip(predictions_dir: Path, output_path: Path):
    """
    Create a ZIP file containing all prediction .txt files.
    """
    txt_files = list(predictions_dir.glob('*.txt'))
    
    print(f"üì¶ Creating submission ZIP with {len(txt_files)} files...")
    
    with zipfile.ZipFile(output_path, 'w', zipfile.ZIP_DEFLATED) as zf:
        for txt_file in txt_files:
            # Add file with just the filename (no directory structure)
            zf.write(txt_file, txt_file.name)
    
    zip_size = output_path.stat().st_size / (1024 * 1024)  # MB
    print(f"\n‚úÖ Submission ZIP created!")
    print(f"   Path: {output_path}")
    print(f"   Size: {zip_size:.2f} MB")
    print(f"   Files: {len(txt_files)}")
    
    return output_path

In [None]:
# Create submission ZIP
SUBMISSION_ZIP = RESULTS_DIR / 'predictions.zip'

create_submission_zip(
    predictions_dir=PREDICTIONS_DIR,
    output_path=SUBMISSION_ZIP
)

## 5. Verify Submission

In [None]:
# Verify: Check a sample prediction file
sample_txts = list(PREDICTIONS_DIR.glob('*.txt'))[:3]

print("üìã Sample prediction files:")
print("="*60)
for txt_path in sample_txts:
    print(f"\n{txt_path.name}:")
    with open(txt_path, 'r') as f:
        content = f.read()
        if content.strip():
            lines = content.strip().split('\n')[:5]
            for line in lines:
                print(f"  {line}")
            if len(content.strip().split('\n')) > 5:
                print(f"  ... ({len(content.strip().split(chr(10)))} total detections)")
        else:
            print("  (no detections)")

In [None]:
# Verify ZIP contents
print("\nüì¶ ZIP Contents (first 10 files):")
print("="*60)
with zipfile.ZipFile(SUBMISSION_ZIP, 'r') as zf:
    files = zf.namelist()
    for f in files[:10]:
        print(f"  {f}")
    if len(files) > 10:
        print(f"  ... ({len(files)} total files)")

In [None]:
# Verify all test images have corresponding .txt files
test_image_names = set(p.stem for p in TEST_IMAGES.glob('*.jpg'))
test_image_names.update(p.stem for p in TEST_IMAGES.glob('*.png'))
test_image_names.update(p.stem for p in TEST_IMAGES.glob('*.jpeg'))

prediction_names = set(p.stem for p in PREDICTIONS_DIR.glob('*.txt'))

missing = test_image_names - prediction_names

if missing:
    print(f"‚ö†Ô∏è WARNING: Missing predictions for {len(missing)} images!")
    for name in list(missing)[:5]:
        print(f"   - {name}")
else:
    print(f"‚úÖ All {len(test_image_names)} test images have predictions!")

## 6. Summary

In [None]:
print("="*70)
print("üéØ SUBMISSION READY")
print("="*70)
print(f"\nüì¶ SUBMISSION FILES:")
print(f"   ZIP: {SUBMISSION_ZIP}")
print(f"   Predictions: {PREDICTIONS_DIR}")
print(f"\nüìã STATISTICS:")
print(f"   Test images: {len(test_image_names)}")
print(f"   Prediction files: {len(prediction_names)}")
print(f"\n‚úÖ Download 'predictions.zip' and submit!")
print("="*70)

In [None]:
# For Colab: Download the ZIP
if IN_COLAB:
    from google.colab import files
    print("üì• Downloading submission ZIP...")
    files.download(str(SUBMISSION_ZIP))