In [1]:
!pip install rasterio
!pip install geopandas
!pip install ultralytics

Collecting ultralytics-thop>=2.0.0 (from ultralytics)
  Downloading ultralytics_thop-2.0.18-py3-none-any.whl.metadata (14 kB)
Collecting sympy==1.13.1 (from torch>=1.8.0->ultralytics)
  Downloading sympy-1.13.1-py3-none-any.whl.metadata (12 kB)
Downloading sympy-1.13.1-py3-none-any.whl (6.2 MB)
   ---------------------------------------- 0.0/6.2 MB ? eta -:--:--
   ---------------------------------------- 0.0/6.2 MB ? eta -:--:--
   - -------------------------------------- 0.3/6.2 MB ? eta -:--:--
   - -------------------------------------- 0.3/6.2 MB ? eta -:--:--
   --- ------------------------------------ 0.5/6.2 MB 762.0 kB/s eta 0:00:08
   --- ------------------------------------ 0.5/6.2 MB 762.0 kB/s eta 0:00:08
   ----- ---------------------------------- 0.8/6.2 MB 817.9 kB/s eta 0:00:07
   ------ --------------------------------- 1.0/6.2 MB 774.0 kB/s eta 0:00:07
   ------ --------------------------------- 1.0/6.2 MB 774.0 kB/s eta 0:00:07
   -------- --------------------------

In [2]:
import os, random, shutil, rasterio, torch

import numpy as np
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt

from PIL import Image
from tqdm import tqdm

from rasterio.windows import Window
from rasterio.features import shapes
from rasterio.transform import Affine
from shapely.geometry import Polygon, MultiPolygon, box, shape

from ultralytics import YOLO

In [3]:
def tile_and_label(
    image_path,
    shapefile_path,
    output_image_dir,
    output_label_dir,
    tile_size=256,
    class_id=1
):
    # Load shapefile
    gdf = gpd.read_file(shapefile_path)

    # Open orthophoto
    with rasterio.open(image_path) as src:
        transform = src.transform
        width = src.width
        height = src.height
        crs = src.crs
        gdf = gdf.to_crs(crs)

        os.makedirs(output_image_dir, exist_ok=True)
        os.makedirs(output_label_dir, exist_ok=True)

        tile_id = 0

        for y in tqdm(range(0, height, tile_size), desc="Rows"):
            for x in range(0, width, tile_size):
                window = Window(x, y, tile_size, tile_size)
                transform_tile = src.window_transform(window)

                # Read tile and convert to RGB
                img_tile = src.read([1, 2, 3], window=window)
                img_tile = np.transpose(img_tile, (1, 2, 0))  # HWC
                img_tile = np.clip(img_tile, 0, 255).astype(np.uint8)

                if img_tile.shape[0] == 0 or img_tile.shape[1] == 0:
                    continue

                img_filename = f"tile_{tile_id:05d}.jpg"
                label_filename = f"tile_{tile_id:05d}.txt"
                img_path = os.path.join(output_image_dir, img_filename)
                label_path = os.path.join(output_label_dir, label_filename)

                # Save image tile
                Image.fromarray(img_tile).save(img_path)

                # Create tile bounds in geometry
                tile_bounds = box(*rasterio.windows.bounds(window, transform))

                # Filter polygons that intersect tile
                clipped_gdf = gdf[gdf.intersects(tile_bounds)]
                if clipped_gdf.empty:
                    open(label_path, 'w').close()  # Empty label file
                    tile_id += 1
                    continue

                with open(label_path, 'w') as f:
                    for geom in clipped_gdf.geometry:
                        clipped_geom = geom.intersection(tile_bounds)
                        if clipped_geom.is_empty:
                            continue

                        polygons = [clipped_geom] if isinstance(clipped_geom, Polygon) else clipped_geom.geoms
                        for poly in polygons:
                            coords = list(poly.exterior.coords)
                            norm_coords = []
                            for lon, lat in coords:
                                px, py = (~transform_tile) * (lon, lat)
                                x_norm = px / tile_size
                                y_norm = py / tile_size
                                if 0 <= x_norm <= 1 and 0 <= y_norm <= 1:
                                    norm_coords.append((x_norm, y_norm))

                            if len(norm_coords) < 6:
                                continue  # skip too small polys

                            coords_flat = [str(round(v, 6)) for xy in norm_coords for v in xy]
                            f.write(f"{class_id} {' '.join(coords_flat)}\n")

                tile_id += 1

In [None]:
tile_and_label(
    image_path="/content/drive/MyDrive/AGRI/Segment/S2DR3/sentinel-2_indices.tif",
    shapefile_path="/content/drive/MyDrive/AGRI/Segment/S2DR3/shp/boundary_CropField_NuevaEcija.shp",
    output_image_dir="/content/drive/MyDrive/AGRI/Segment/S2DR3/images",
    output_label_dir="/content/drive/MyDrive/AGRI/Segment/S2DR3/labels",
    tile_size=256,
    class_id=1
)

In [None]:
image_dir = '/content/drive/MyDrive/AGRI/Segment/S2DR3/images'
label_dir = '/content/drive/MyDrive/AGRI/Segment/S2DR3/labels'

train_images = '/content/drive/MyDrive/AGRI/Segment/S2DR3/yolo/images/train'
val_images = '/content/drive/MyDrive/AGRI/Segment/S2DR3/yolo/images/val'
train_labels = '/content/drive/MyDrive/AGRI/Segment/S2DR3/yolo/labels/train'
val_labels = '/content/drive/MyDrive/AGRI/Segment/S2DR3/yolo/labels/val'

os.makedirs(train_images, exist_ok=True)
os.makedirs(val_images, exist_ok=True)
os.makedirs(train_labels, exist_ok=True)
os.makedirs(val_labels, exist_ok=True)

files = os.listdir(image_dir)
random.shuffle(files)
split = int(0.8 * len(files))

for f in files[:split]:
    shutil.copy(os.path.join(image_dir, f), train_images)
    shutil.copy(os.path.join(label_dir, f.replace('.jpg', '.txt')), train_labels)

for f in files[split:]:
    shutil.copy(os.path.join(image_dir, f), val_images)
    shutil.copy(os.path.join(label_dir, f.replace('.jpg', '.txt')), val_labels)