<a href="https://colab.research.google.com/github/JaneGondwegithub/LULC_classification_and_prediction/blob/main/Patches_Extraction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
import ee
ee.Authenticate()
ee.Initialize(project='lulc-experiment-2025')
print(ee.String('GEE is initialized').getInfo())

GEE is initialized


In [4]:
import ee
ee.Initialize(project='lulc-experiment-2025')
lulc = ee.ImageCollection('projects/sat-io/open-datasets/landcover/ESRI_Global-LULC_10m').mosaic()
print(lulc.bandNames().getInfo())

['b1']


In [5]:
import ee
ee.Initialize(project='lulc-experiment-2025')
sentinel2 = ee.ImageCollection('COPERNICUS/S2_SR_HARMONIZED') \
    .filterDate('2020-01-01', '2020-12-31') \
    .filter(ee.Filter.lt('CLOUDY_PIXEL_PERCENTAGE', 10)) \
    .filterBounds(ee.Geometry.BBox(32.67, -17.13, 35.92, -9.37))
print(sentinel2.size().getInfo())  # Should return >0

1539


In [6]:
import ee
ee.Initialize(project='lulc-experiment-2025')
lulc = ee.ImageCollection('projects/sat-io/open-datasets/landcover/ESRI_Global-LULC_10m').mosaic()
CLASS_NAMES = ['Water', 'Trees', 'Flooded Vegetation', 'Crops', 'Built Area',
               'Bare Ground', 'Snow/Ice', 'Clouds', 'Rangeland']
CLASS_VALUES = list(range(1, 10))
region = ee.Geometry.BBox(33.5, -14.0, 34.5, -13.0)  # Smaller Malawi subregion
histogram = lulc.reduceRegion(
    reducer=ee.Reducer.frequencyHistogram(),
    geometry=region,
    scale=10,
    maxPixels=1e8,
    bestEffort=True
).get('b1').getInfo()
print("Class distribution:", histogram)
for class_value, class_name in zip(CLASS_VALUES, CLASS_NAMES):
    count = histogram.get(str(class_value), 0)
    print(f"{class_name} (value {class_value}): {count} pixels")

Class distribution: {'1': 9521357, '2': 5974033, '3': 38256, '4': 151531, '5': 19457483, '6': 60015955, '7': 4820375, '8': 21010}
Water (value 1): 9521357 pixels
Trees (value 2): 5974033 pixels
Flooded Vegetation (value 3): 38256 pixels
Crops (value 4): 151531 pixels
Built Area (value 5): 19457483 pixels
Bare Ground (value 6): 60015955 pixels
Snow/Ice (value 7): 4820375 pixels
Clouds (value 8): 21010 pixels
Rangeland (value 9): 0 pixels


In [None]:
import ee
import logging

# Set up logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

# Authenticate and Initialize Earth Engine
ee.Authenticate()
ee.Initialize(project='lulc-experiment-2025')

# Mount Google Drive in Colab
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

# CONFIGURATION
PATCH_SIZE = 128
S2_RES = 10
BANDS = ['B2', 'B3', 'B4', 'B8', 'B11', 'B12']
CLASS_NAMES = ['Water', 'Trees', 'Flooded Vegetation', 'Crops', 'Built Area', 'Bare Ground']
CLASS_VALUES = [1, 2, 3, 4, 5, 6]
PATCHES_PER_CLASS = 5000   # total number per class
BATCH_SIZE = 200           # how many per batch per class
EXPORT_FOLDER = 'lulc_patches'
REGION = ee.Geometry.BBox(33.5, -14.0, 34.5, -13.0)

# SET THESE FOR THIS BATCH!
START_IDX = 800              # inclusive
END_IDX = 1000                # exclusive

lulc = ee.ImageCollection('projects/sat-io/open-datasets/landcover/ESRI_Global-LULC_10m').mosaic()
s2 = ee.ImageCollection('COPERNICUS/S2_SR_HARMONIZED') \
    .filterDate('2020-01-01', '2020-12-31') \
    .filter(ee.Filter.lt('CLOUDY_PIXEL_PERCENTAGE', 10)) \
    .filterBounds(REGION) \
    .mosaic()

def square_patch_geom(point, size_m):
    coords = point.geometry().coordinates()
    half = size_m / 2
    lon = ee.Number(coords.get(0))
    lat = ee.Number(coords.get(1))
    lon_offset = half / 111320
    lat_offset = half / 110540
    return ee.Geometry.Rectangle([
        [lon.subtract(lon_offset), lat.subtract(lat_offset)],
        [lon.add(lon_offset), lat.add(lat_offset)]
    ])

def sample_patch_centers(class_value, num_patches):
    mask = lulc.eq(class_value)
    points = mask.stratifiedSample(
        numPoints=num_patches,
        classBand='b1',
        region=REGION,
        scale=10,
        seed=42,
        geometries=True
    )
    return points

def export_patch(point, class_name, idx):
    geom = square_patch_geom(point, PATCH_SIZE * S2_RES)
    patch_img = s2.select(BANDS).clip(geom).toFloat()
    fname = f"{class_name}_patch_{idx:05d}"
    task = ee.batch.Export.image.toDrive(
        image=patch_img,
        description=fname,
        folder=EXPORT_FOLDER,
        fileNamePrefix=fname,
        region=geom.getInfo()['coordinates'],
        scale=S2_RES,
        maxPixels=1e8
    )
    task.start()
    logging.info(f"Started export task: {fname}")
    return task

# For each class, sample all patch centers ONCE (not per batch)
for class_val, class_name in zip(CLASS_VALUES, CLASS_NAMES):
    logging.info(f"Sampling {PATCHES_PER_CLASS} centers for {class_name} (class value {class_val})")
    points = sample_patch_centers(class_val, PATCHES_PER_CLASS)
    points_list = points.toList(PATCHES_PER_CLASS)
    # Only export this batch
    for i in range(START_IDX, min(END_IDX, PATCHES_PER_CLASS)):
        pt = ee.Feature(points_list.get(i))
        export_patch(pt, class_name, i)

logging.info(f"Submitted export tasks for patches {START_IDX} to {END_IDX-1} for each class.")
print(f"Export tasks submitted for patches {START_IDX} to {END_IDX-1} for each class.")
print("Check the Earth Engine Code Editor > Tasks tab for progress and download patches from your Google Drive when done.")