# üöÄ Notebook 3: Training (Colab Ready)

## Military Object Detection with YOLOv8

**‚ö° OPTIMIZED**: Single model, 20 epochs, ~15-30 min on GPU

### Steps:
1. Upload `data.zip` to Colab (or mount Drive)
2. Run all cells
3. Download trained model at the end

---

## 1. Install & Import

In [10]:
# Install packages
!pip install -q ultralytics>=8.0.0

In [11]:
import os, json, shutil, zipfile, warnings
from pathlib import Path
from datetime import datetime

import numpy as np
import torch
from ultralytics import YOLO

warnings.filterwarnings('ignore')

# Check GPU
print(f"PyTorch: {torch.__version__}")
if torch.cuda.is_available():
    print(f"‚úÖ GPU: {torch.cuda.get_device_name(0)}")
    DEVICE = 0
else:
    print("‚ö†Ô∏è No GPU! Go to Runtime > Change runtime type > GPU")
    DEVICE = 'cpu'

PyTorch: 2.9.0+cu126
‚úÖ GPU: Tesla T4


## 2. Upload Data

**Option A**: Upload `data.zip` using the file browser (left sidebar)

**Option B**: Mount Google Drive if `data.zip` is there

In [12]:
# Detect environment
try:
    import google.colab
    IN_COLAB = True
    PROJECT_ROOT = Path('/content')
    print("üöÄ Running in Colab")
except ImportError:
    IN_COLAB = False
    PROJECT_ROOT = Path('..')
    print("üíª Running locally")

# Create directories
DATASET_ROOT = PROJECT_ROOT / 'military_object_dataset'
CONFIG_DIR = PROJECT_ROOT / 'config'
MODELS_DIR = PROJECT_ROOT / 'models'
RUNS_DIR = PROJECT_ROOT / 'runs'
RESULTS_DIR = PROJECT_ROOT / 'results'

for d in [MODELS_DIR, RUNS_DIR, RESULTS_DIR]:
    d.mkdir(exist_ok=True)

üöÄ Running in Colab


In [13]:
# === UPLOAD/EXTRACT DATA ===

DATA_ZIP = PROJECT_ROOT / 'data.zip'

if IN_COLAB:
    # Check if data already extracted
    if DATASET_ROOT.exists() and (CONFIG_DIR / 'dataset.yaml').exists():
        print("‚úÖ Data already extracted!")

    # Check for data.zip in current directory
    elif DATA_ZIP.exists():
        print("üì¶ Found data.zip, extracting...")
        with zipfile.ZipFile(DATA_ZIP, 'r') as z:
            z.extractall(PROJECT_ROOT)
        print("‚úÖ Extracted!")

    # Try Google Drive
    else:
        print("üìÇ Mounting Google Drive...")
        from google.colab import drive
        drive.mount('/content/drive')

        # Check common locations
        drive_paths = [
            Path('/content/drive/MyDrive/data.zip'),
            Path('/content/drive/MyDrive/hackathon/data.zip'),
        ]

        found = False
        for drive_zip in drive_paths:
            if drive_zip.exists():
                print(f"üì¶ Found {drive_zip}, copying...")
                shutil.copy(drive_zip, DATA_ZIP)
                with zipfile.ZipFile(DATA_ZIP, 'r') as z:
                    z.extractall(PROJECT_ROOT)
                print("‚úÖ Extracted!")
                found = True
                break

        if not found:
            print("‚ùå data.zip not found!")
            print("   Please upload data.zip to /content or to Google Drive root")
else:
    print("üíª Local mode - using existing data")

‚úÖ Data already extracted!


In [14]:
# Verify data
DATASET_YAML = CONFIG_DIR / 'dataset.yaml'

if not DATASET_YAML.exists():
    raise FileNotFoundError(f"‚ùå Missing {DATASET_YAML}! Please upload data.zip")

print(f"‚úÖ Dataset YAML: {DATASET_YAML}")
print(f"‚úÖ Dataset root: {DATASET_ROOT}")

# Count images
train_imgs = len(list((DATASET_ROOT / 'train' / 'images').glob('*')))
val_imgs = len(list((DATASET_ROOT / 'val' / 'images').glob('*')))
test_imgs = len(list((DATASET_ROOT / 'test' / 'images').glob('*')))
print(f"   Train: {train_imgs} | Val: {val_imgs} | Test: {test_imgs}")

‚úÖ Dataset YAML: /content/config/dataset.yaml
‚úÖ Dataset root: /content/military_object_dataset
   Train: 10000 | Val: 2941 | Test: 1396


## 3. Train Model

In [15]:
# Set seed for reproducibility
SEED = 42
np.random.seed(SEED)
torch.manual_seed(SEED)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(SEED)

In [16]:
# Training config
CONFIG = {
    'model': 'yolov8s.pt',  # Balanced speed/accuracy
    'epochs': 11,           # Enough for good results
    'batch': 16,
    'imgsz': 640,
    'patience': 5,          # Early stopping
    'optimizer': 'AdamW',
    'lr0': 0.002,
}

print("‚ö° Training Config:")
for k, v in CONFIG.items():
    print(f"   {k}: {v}")

‚ö° Training Config:
   model: yolov8s.pt
   epochs: 11
   batch: 16
   imgsz: 640
   patience: 5
   optimizer: AdamW
   lr0: 0.002


In [17]:
# === TRAIN ===
print("\n" + "="*60)
print("üöÄ STARTING TRAINING")
print("="*60)

model = YOLO(CONFIG['model'])

start_time = datetime.now()

results = model.train(
    data=str(DATASET_YAML.absolute()),
    device=DEVICE,
    epochs=CONFIG['epochs'],
    batch=CONFIG['batch'],
    imgsz=CONFIG['imgsz'],
    patience=CONFIG['patience'],
    optimizer=CONFIG['optimizer'],
    lr0=CONFIG['lr0'],
    project=str(RUNS_DIR / 'detect'),
    name='military_detector',
    exist_ok=True,
    pretrained=True,
    seed=SEED,
    amp=True,
    plots=True,
    val=True,
    workers=2,
)

training_time = (datetime.now() - start_time).total_seconds() / 60
print(f"\n‚úÖ Training complete in {training_time:.1f} minutes")


üöÄ STARTING TRAINING
Ultralytics 8.3.237 üöÄ Python-3.12.12 torch-2.9.0+cu126 CUDA:0 (Tesla T4, 15095MiB)
[34m[1mengine/trainer: [0magnostic_nms=False, amp=True, augment=False, auto_augment=randaugment, batch=16, bgr=0.0, box=7.5, cache=False, cfg=None, classes=None, close_mosaic=10, cls=0.5, compile=False, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=False, cutmix=0.0, data=/content/config/dataset.yaml, degrees=0.0, deterministic=True, device=0, dfl=1.5, dnn=False, dropout=0.0, dynamic=False, embed=None, epochs=11, erasing=0.4, exist_ok=True, fliplr=0.5, flipud=0.0, format=torchscript, fraction=1.0, freeze=None, half=False, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, imgsz=640, int8=False, iou=0.7, keras=False, kobj=1.0, line_width=None, lr0=0.002, lrf=0.01, mask_ratio=4, max_det=300, mixup=0.0, mode=train, model=yolov8s.pt, momentum=0.937, mosaic=1.0, multi_scale=False, name=military_detector, nbs=64, nms=False, opset=None, optimize=False, optimizer=AdamW, overlap_mask=True,

## 4. Validate & Save Model

In [18]:
# Get best model path
save_dir = Path(results.save_dir)
best_model_path = save_dir / 'weights' / 'best.pt'

print(f"üìÅ Training outputs: {save_dir}")
print(f"üìÅ Best model: {best_model_path}")

üìÅ Training outputs: /content/runs/detect/military_detector
üìÅ Best model: /content/runs/detect/military_detector/weights/best.pt


In [19]:
# Validate
print("\nüîç Running validation...")
best_model = YOLO(str(best_model_path))
val_results = best_model.val(
    data=str(DATASET_YAML.absolute()),
    batch=16,
    imgsz=640,
    plots=True,
)

print("\n" + "="*60)
print("üìä FINAL RESULTS")
print("="*60)
print(f"   mAP@0.5:      {val_results.box.map50:.4f}")
print(f"   mAP@0.5:0.95: {val_results.box.map:.4f}")
print(f"   Precision:    {val_results.box.mp:.4f}")
print(f"   Recall:       {val_results.box.mr:.4f}")
print(f"   Training:     {training_time:.1f} min")


üîç Running validation...
Ultralytics 8.3.237 üöÄ Python-3.12.12 torch-2.9.0+cu126 CUDA:0 (Tesla T4, 15095MiB)
Model summary (fused): 72 layers, 11,130,228 parameters, 0 gradients, 28.5 GFLOPs
[34m[1mval: [0mFast image access ‚úÖ (ping: 0.0¬±0.0 ms, read: 1486.3¬±1105.5 MB/s, size: 96.7 KB)
[K[34m[1mval: [0mScanning /content/military_object_dataset/val/labels.cache... 2941 images, 273 backgrounds, 0 corrupt: 100% ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ 2941/2941 5.9Mit/s 0.0s
[K                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100% ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ 184/184 3.7it/s 49.1s
                   all       2941       5081      0.595      0.406      0.421      0.262
    camouflage_soldier        385        510      0.681      0.627      0.664      0.332
                weapon        222        358      0.579      0.453      0.472      0.318
         military_tank        938       1787      0.739      0.831       0.81      0.518

In [20]:
# Copy best model to models directory
model_dst = MODELS_DIR / 'best_model.pt'
shutil.copy(best_model_path, model_dst)
print(f"\nüíæ Model copied to: {model_dst}")

# Save training results
results_data = {
    'model': CONFIG['model'],
    'epochs': CONFIG['epochs'],
    'mAP50': float(val_results.box.map50),
    'mAP50-95': float(val_results.box.map),
    'precision': float(val_results.box.mp),
    'recall': float(val_results.box.mr),
    'training_time_min': round(training_time, 1)
}

with open(RESULTS_DIR / 'training_results.json', 'w') as f:
    json.dump(results_data, f, indent=2)

print(f"üíæ Results saved to: {RESULTS_DIR / 'training_results.json'}")


üíæ Model copied to: /content/models/best_model.pt
üíæ Results saved to: /content/results/training_results.json


## 5. Download Model (Colab)

**IMPORTANT**: Download the trained model before the Colab session ends!

In [21]:
if IN_COLAB:
    from google.colab import files

    print("üì• Downloading best_model.pt...")
    files.download(str(model_dst))

    print("\nüì• Downloading training_results.json...")
    files.download(str(RESULTS_DIR / 'training_results.json'))
else:
    print(f"üíª Local mode - model saved at: {model_dst}")

üì• Downloading best_model.pt...


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>


üì• Downloading training_results.json...


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [22]:
print("\n" + "="*60)
print("‚úÖ TRAINING COMPLETE!")
print("="*60)
print("\nüìã Next Steps:")
print("   1. Download best_model.pt (above)")
print("   2. Run Notebook 05 to generate predictions.zip")
print("   3. Submit predictions.zip to hackathon")
print("\nüí° TIP: Keep this Colab tab open and run Notebook 05 here!")


‚úÖ TRAINING COMPLETE!

üìã Next Steps:
   1. Download best_model.pt (above)
   2. Run Notebook 05 to generate predictions.zip
   3. Submit predictions.zip to hackathon

üí° TIP: Keep this Colab tab open and run Notebook 05 here!
