In [None]:
# SpaceNet Building Detection - YOLOv8 ONNX

This notebook trains YOLOv8 model for building detection using SpaceNet dataset and converts it to ONNX format.

## Setup

First, let's install required packages and check GPU.


In [None]:
# Check GPU
!nvidia-smi

# Connect to Google Drive
from google.colab import drive
drive.mount('/content/drive')

# Install required packages
!pip install ultralytics geopandas shapely onnxruntime rasterio scikit-learn opencv-python -q

import os
import shutil
from PIL import Image
import numpy as np
import rasterio
from rasterio.plot import reshape_as_image
import geopandas as gpd
from shapely.geometry import box
from sklearn.model_selection import train_test_split
import cv2
import warnings
warnings.filterwarnings('ignore')


In [None]:
## Dataset Download

Let's download SpaceNet dataset from AWS. This process might take some time.


In [None]:
# Install AWS CLI
!pip install awscli -q

# Download SpaceNet dataset
!aws s3 cp s3://spacenet-dataset/spacenet/SN2_buildings/tarballs/SN2_buildings_train_AOI_2_Vegas.tar.gz . --no-sign-request

# Extract archive
!mkdir -p spacenet
!tar -xf SN2_buildings_train_AOI_2_Vegas.tar.gz -C spacenet


In [None]:
## Helper Functions

Functions for image processing and label conversion.


In [None]:
def normalize_image(img):
    """Normalize image and enhance contrast"""
    img = cv2.normalize(img, None, 0, 255, cv2.NORM_MINMAX)
    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
    if len(img.shape) == 3:
        for i in range(3):
            img[:,:,i] = clahe.apply(img[:,:,i].astype(np.uint8))
    else:
        img = clahe.apply(img.astype(np.uint8))
    return img

def resize_image(img, target_size=(640, 640)):
    """Resize image to target size"""
    return cv2.resize(img, target_size, interpolation=cv2.INTER_AREA)

def enhance_edges(img):
    """Enhance edges in image"""
    kernel = np.array([[-1,-1,-1], [-1,9,-1], [-1,-1,-1]])
    return cv2.filter2D(img, -1, kernel)

def geojson_to_yolo(geojson_path, image_size, out_path):
    """Convert GeoJSON to YOLO format"""
    try:
        gdf = gpd.read_file(geojson_path)
        with open(out_path, 'w') as f:
            for _, row in gdf.iterrows():
                geom = row.geometry
                if geom.geom_type == "Polygon":
                    minx, miny, maxx, maxy = geom.bounds
                    x_center = ((minx + maxx) / 2) / image_size[0]
                    y_center = ((miny + maxy) / 2) / image_size[1]
                    width = (maxx - minx) / image_size[0]
                    height = (maxy - miny) / image_size[1]
                    
                    x_center = max(0.005, min(0.995, x_center))
                    y_center = max(0.005, min(0.995, y_center))
                    width = max(0.01, min(0.99, width))
                    height = max(0.01, min(0.99, height))
                    
                    f.write(f"0 {x_center:.6f} {y_center:.6f} {width:.6f} {height:.6f}\n")
        return True
    except Exception as e:
        print(f"Warning: Could not convert {os.path.basename(geojson_path)}: {e}")
        return False


In [None]:
## Image Processing

Convert TIFF images to JPG format and prepare them for YOLOv8.


In [None]:
# Prepare directories
src_dir = "./spacenet/AOI_2_Vegas_Train/RGB-PanSharpen"
dst_dir = "./spacenet/RGB-Converted"
label_dir = "./spacenet/labels"

os.makedirs(dst_dir, exist_ok=True)
os.makedirs(label_dir, exist_ok=True)

# Convert images
converted = 0
skipped = 0

for file in os.listdir(src_dir):
    if file.endswith('.tif') and "AOI_2_Vegas_img" in file:
        img_id = file.split("RGB-PanSharpen_")[-1].replace(".tif", "")
        geojson_path = os.path.join("./spacenet/AOI_2_Vegas_Train/geojson/buildings", f"buildings_{img_id}.geojson")
        
        if not os.path.exists(geojson_path):
            skipped += 1
            continue

        src_path = os.path.join(src_dir, file)
        try:
            with rasterio.open(src_path) as src:
                rgb = src.read([1, 2, 3])
                img = reshape_as_image(rgb)
                img = normalize_image(img)
                img = enhance_edges(img)
                img = resize_image(img)
                
                out_path = os.path.join(dst_dir, f"{img_id}.jpg")
                Image.fromarray(img.astype(np.uint8)).save(out_path, quality=95)
                converted += 1
                
                if converted % 100 == 0:
                    print(f"Progress: {converted} images processed...")
        except Exception as e:
            print(f"Warning: Could not convert {file}: {e}")
            skipped += 1

print(f"\nImage Conversion Statistics:")
print(f"Success: {converted} images")
print(f"Skipped: {skipped} images")


In [None]:
## Label Conversion

Convert GeoJSON labels to YOLO format.


In [None]:
# Get image size from first image
image_size = None
for img_file in os.listdir(dst_dir):
    if img_file.endswith('.jpg'):
        with Image.open(os.path.join(dst_dir, img_file)) as img:
            image_size = img.size
            print(f"Reference image size: {image_size}")
            break

# Convert labels
converted_labels = 0
failed_labels = 0

if image_size:
    for img_file in os.listdir(dst_dir):
        if img_file.endswith('.jpg'):
            img_id = img_file.replace('.jpg', '')
            geojson_path = os.path.join("./spacenet/AOI_2_Vegas_Train/geojson/buildings", f"buildings_{img_id}.geojson")
            label_path = os.path.join(label_dir, f"{img_id}.txt")
            
            if geojson_to_yolo(geojson_path, image_size, label_path):
                converted_labels += 1
                if converted_labels % 100 == 0:
                    print(f"Progress: {converted_labels} labels converted...")
            else:
                failed_labels += 1

print(f"\nLabel Conversion Statistics:")
print(f"Success: {converted_labels} labels")
print(f"Failed: {failed_labels} labels")


In [None]:
## Dataset Split

Split dataset into training and validation sets.


In [None]:
# List labeled images
all_images = []
for img_file in os.listdir(dst_dir):
    if img_file.endswith('.jpg'):
        img_id = img_file.replace('.jpg', '')
        label_path = os.path.join(label_dir, f"{img_id}.txt")
        if os.path.exists(label_path) and os.path.getsize(label_path) > 0:
            all_images.append(img_file)

if len(all_images) > 0:
    # Split train/val
    train_imgs, val_imgs = train_test_split(all_images, test_size=0.2, random_state=42)
    
    # Create directories
    train_img_dir = "./spacenet/images/train"
    val_img_dir = "./spacenet/images/val"
    train_label_dir = "./spacenet/labels/train"
    val_label_dir = "./spacenet/labels/val"
    
    for dir_path in [train_img_dir, val_img_dir, train_label_dir, val_label_dir]:
        os.makedirs(dir_path, exist_ok=True)
    
    # Copy files
    def copy_files(images, img_dir, label_dir_src, label_dir_dst):
        copied = 0
        for img_file in images:
            try:
                img_id = img_file.replace('.jpg', '')
                shutil.copy(
                    os.path.join(dst_dir, img_file),
                    os.path.join(img_dir, img_file)
                )
                shutil.copy(
                    os.path.join(label_dir_src, f"{img_id}.txt"),
                    os.path.join(label_dir_dst, f"{img_id}.txt")
                )
                copied += 1
            except Exception as e:
                print(f"Warning: Could not copy {img_file}: {e}")
        return copied
    
    train_copied = copy_files(train_imgs, train_img_dir, label_dir, train_label_dir)
    val_copied = copy_files(val_imgs, val_img_dir, label_dir, val_label_dir)
    
    print(f"\nDataset Split Statistics:")
    print(f"Training: {train_copied} images")
    print(f"Validation: {val_copied} images")


In [None]:
## YOLOv8 Configuration

Create configuration file for YOLOv8 training.


In [None]:
# YOLOv8 configuration file
yaml_content = f"""
# YOLOv8 configuration
path: {os.path.abspath("./spacenet")}  # dataset root dir
train: images/train  # train images
val: images/val  # validation images

# Classes
nc: 1  # number of classes
names: ['building']  # class names

# Training parameters
epochs: 50
batch: 16
imgsz: 640
optimizer: AdamW
lr0: 0.001
lrf: 0.01
momentum: 0.937
weight_decay: 0.0005
warmup_epochs: 3.0
warmup_momentum: 0.8
warmup_bias_lr: 0.1
box: 7.5
cls: 0.5
dfl: 1.5
fl_gamma: 0.0
label_smoothing: 0.0
nbs: 64
hsv_h: 0.015
hsv_s: 0.7
hsv_v: 0.4
translate: 0.1
scale: 0.5
shear: 0.0
perspective: 0.0
flipud: 0.0
fliplr: 0.5
mosaic: 1.0
mixup: 0.0
copy_paste: 0.0
"""

with open("spacenet.yaml", "w") as f:
    f.write(yaml_content)
print("spacenet.yaml created successfully.")


In [None]:
## Model Training

Train YOLOv8 model and convert to ONNX format.


In [None]:
from ultralytics import YOLO

# Model training
model = YOLO("yolov8n.pt")

print("\nTraining Parameters:")
print("  - Model: YOLOv8n")
print("  - Epochs: 50")
print("  - Image size: 640x640")
print("  - Batch size: 16")
print("  - Optimizer: AdamW")
print("  - Learning rate: 0.001")

model.train(
    data="spacenet.yaml",
    epochs=50,
    imgsz=640,
    batch=16,
    name='spacenet_buildings',
    optimizer='AdamW',
    lr0=0.001
)


In [None]:
## ONNX Export

Export trained model to ONNX format and test it.


In [None]:
# ONNX export
print("\nExporting model to ONNX format...")
model.export(format="onnx")
print("ONNX model created: yolov8n.onnx")

# ONNX model test
print("\nTesting ONNX model...")
import onnxruntime
session = onnxruntime.InferenceSession("yolov8n.onnx")
input_name = session.get_inputs()[0].name
dummy_input = np.random.rand(1, 3, 640, 640).astype(np.float32)
output = session.run(None, {input_name: dummy_input})
print("ONNX model test successful!")


In [None]:
## Save Results

Save trained model and results to Google Drive.


In [None]:
# Copy results to Drive
!cp -r runs/detect/spacenet_buildings /content/drive/MyDrive/
!cp yolov8n.onnx /content/drive/MyDrive/

print("Results saved to Google Drive successfully.")
